Find this useful? Enter your email to receive occasional updates for securing PHP code.
Signing you up...
Thank you for signing up!
PHP Decode
<?php /** Plugin Name: Crawlomatic Multipage Scraper Post Generator Plugin URI: //1.en..
Decoded Output download
<?php
/**
Plugin Name: Crawlomatic Multipage Scraper Post Generator
Plugin URI: //1.envato.market/coderevolution
Description: This plugin will generate content for you, even in your sleep using article crawling and scraping.
Author: CodeRevolution
Version: 2.6.2
Author URI: //coderevolution.ro
License: Commercial. For personal use only. Not to give away or resell.
Text Domain: crawlomatic-multipage-scraper-post-generator
*/
/*
Copyright 2016 - 2024 CodeRevolution
*/
defined('ABSPATH') or die();
require_once (dirname(__FILE__) . "/res/other/plugin-dash.php");
require_once( plugin_dir_path(__FILE__) . 'class.crawlomatic.shortcode.php' );
function crawlomatic_get_version() {
$plugin_data = get_file_data( __FILE__ , array('Version' => 'Version'), false);
return $plugin_data['Version'];
}
function crawlomatic_load_textdomain() {
load_plugin_textdomain( 'crawlomatic-multipage-scraper-post-generator', false, basename( dirname( __FILE__ ) ) . '/languages' );
}
add_action( 'init', 'crawlomatic_load_textdomain' );
function crawlomatic_utf8_encode($str)
{
if(function_exists('mb_detect_encoding') && function_exists('mb_convert_encoding'))
{
$enc = mb_detect_encoding($str);
if ($enc !== FALSE) {
$str = mb_convert_encoding($str, 'UTF-8', $enc);
} else {
$str = mb_convert_encoding($str, 'UTF-8');
}
}
return $str;
}
function crawlomatic_discover_links( $keyword, $max_count)
{
$ret_me = array();
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$page_number = 0;
$result_number = 10;
if (isset($crawlomatic_Main_Settings['google_search_api']) && trim($crawlomatic_Main_Settings['google_search_api']) != '')
{
if (isset($crawlomatic_Main_Settings['google_search_cx']) && trim($crawlomatic_Main_Settings['google_search_cx']) != '')
{
$is_ok = true;
while($is_ok && count($ret_me) < $max_count)
{
if($page_number == 0)
{
$first = 0;
}
else
{
$first = ($page_number * $result_number) + 1;
}
if($first > 91)
{
break;
}
$feed_uri = add_query_arg( array(
'q' => urlencode( $keyword ),
'cx' => trim($crawlomatic_Main_Settings['google_search_cx']),
'key' => trim($crawlomatic_Main_Settings['google_search_api']),
'num' => $result_number,
'start' => $first
), 'https://www.googleapis.com/customsearch/v1' );
$responsexxx = crawlomatic_get_web_page_from_search($feed_uri, '');
if ($responsexxx === FALSE)
{
$is_ok = false;
crawlomatic_log_to_file('Failed to discover Google API links for: ' . $feed_uri);
}
else
{
$json_resp = json_decode($responsexxx);
if ($json_resp === FALSE)
{
$is_ok = false;
crawlomatic_log_to_file('Failed to decode Google API links for: ' . $feed_uri);
}
else
{
if (isset($json_resp->items))
{
foreach($json_resp->items as $jitem)
{
if(!in_array($jitem->link, $ret_me))
{
$ret_me[] = $jitem->link;
}
}
$page_number++;
}
else
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Could not find any links from Google: ' . print_r($json_resp, true));
}
$is_ok = false;
}
}
}
}
$ret_me = array_unique($ret_me);
}
}
if(count($ret_me) > 0)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Discovered links from Google API Search ' . print_r($ret_me, true));
}
}
if(count($ret_me) < $max_count)
{
if($page_number == '')
{
$first = 0;
}
else
{
$first = intval($page_number) * $result_number;
}
$feed_uri = add_query_arg( array(
'q' => urlencode( $keyword ),
'format' => 'rss',
'first' => $first
), 'https://www.bing.com/search' );
$responsexxx = crawlomatic_get_web_page_from_search($feed_uri, 'MUID=21CFA0EFAA7B6B5011C4AF1AAB3F6AB4;SRCHUSR=DOB=20220105&T=1649962921000&TPC=1649962921000;_SS=SID=06F809EE2B476AA61076187B2ABD6B1B;_HPVN=CS=eyJQbiI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiUCJ9LCJTYyI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiSCJ9LCJReiI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiVCJ9LCJBcCI6dHJ1ZSwiTXV0ZSI6dHJ1ZSwiTGFkIjoiMjAyMi0wMS0wNVQwMDowMDowMFoiLCJJb3RkIjowLCJHd2IiOjAsIkRmdCI6bnVsbCwiTXZzIjowLCJGbHQiOjAsIkltcCI6Mn0=;_EDGE_S=SID=06F809EE2B476AA61076187B2ABD6B1B;SRCHHPGUSR=HV=1649962932&SRCHLANG=en&BRW=XW&BRH=M&CW=1920&CH=979&SW=1920&SH=1080&DPR=1&UTC=180&DM=0&WTS=63785559721&PV=10.0.0;SUID=A;MUIDB=21CFA0EFAA7B6B5011C4AF1AAB3F6AB4;ANON=A=F47E0C93389C62B345C6E15EFFFFFFFF&E=1a82&W=1;MSCC=1;SRCHUID=V=2&GUID=7D2B384C18484543ACDB8EC5C0FB47FB&dmnchg=1;SRCHD=AF=NOFORM');
if ($responsexxx === FALSE)
{
crawlomatic_log_to_file('Failed to discover Bing links for: ' . $feed_uri);
}
libxml_use_internal_errors(true);
$response = simplexml_load_string( $responsexxx );
$response = json_encode( $response );
$response = json_decode( $response, true );
$cnot = false;
if ( empty( $response ) || ! isset( $response['channel'] ) || ! isset( $response['channel']['item'] ) || empty( $response['channel']['item'] ) )
{
sleep(2);
$responsexxx2 = crawlomatic_get_web_page_from_search($feed_uri, 'MUID=21CFA0EFAA7B6B5011C4AF1AAB3F6AB4;SRCHUSR=DOB=20220105&T=1649962921000&TPC=1649962921000;_SS=SID=06F809EE2B476AA61076187B2ABD6B1B;_HPVN=CS=eyJQbiI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiUCJ9LCJTYyI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiSCJ9LCJReiI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiVCJ9LCJBcCI6dHJ1ZSwiTXV0ZSI6dHJ1ZSwiTGFkIjoiMjAyMi0wMS0wNVQwMDowMDowMFoiLCJJb3RkIjowLCJHd2IiOjAsIkRmdCI6bnVsbCwiTXZzIjowLCJGbHQiOjAsIkltcCI6Mn0=;_EDGE_S=SID=06F809EE2B476AA61076187B2ABD6B1B;SRCHHPGUSR=HV=1649962932&SRCHLANG=en&BRW=XW&BRH=M&CW=1920&CH=979&SW=1920&SH=1080&DPR=1&UTC=180&DM=0&WTS=63785559721&PV=10.0.0;SUID=A;MUIDB=21CFA0EFAA7B6B5011C4AF1AAB3F6AB4;ANON=A=F47E0C93389C62B345C6E15EFFFFFFFF&E=1a82&W=1;MSCC=1;SRCHUID=V=2&GUID=7D2B384C18484543ACDB8EC5C0FB47FB&dmnchg=1;SRCHD=AF=NOFORM');
if ($responsexxx2 === FALSE)
{
crawlomatic_log_to_file('Failed to discover Bing links for: ' . $feed_uri);
}
libxml_use_internal_errors(true);
$response = simplexml_load_string( $responsexxx2 );
$response = json_encode( $response );
$response = json_decode( $response, true );
if ( empty( $response ) || ! isset( $response['channel'] ) || ! isset( $response['channel']['item'] ) || empty( $response['channel']['item'] ) )
{
if(count($ret_me) == 0)
{
$message2 = 'Could not find any links from Bing: ' . $feed_uri;
crawlomatic_log_to_file( $message2 );
return false;
}
else
{
$cnot = true;
}
}
}
if($cnot == false)
{
$links = $response['channel']['item'];
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Discovered links from Bing Search ' . print_r($links, true));
}
if(isset($links['link']))
{
if(!in_array($links['link'], $ret_me))
{
$ret_me[] = $links['link'];
}
}
else
{
foreach($links as $lk)
{
if(is_string($lk))
{
if(!in_array($lk, $ret_me))
{
$ret_me[] = $lk;
}
}
else
{
if(!in_array($lk['link'], $ret_me))
{
$ret_me[] = $lk['link'];
}
}
}
}
$ret_me = array_unique($ret_me);
}
}
return $ret_me;
}
function crawlomatic_get_web_page_from_search($url, $custom_cookie = '')
{
$content = false;
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$user_agent = crawlomatic_get_random_user_agent();
if(function_exists('curl_version') && filter_var($url, FILTER_VALIDATE_URL))
{
$headers = array(
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language: en-US,en;q=0.5',
'Connection: keep-alive',
'Upgrade-Insecure-Requests: 1',
);
$ch = curl_init();
if ($ch === FALSE) {
crawlomatic_log_to_file('curl not inited: ' . $url);
$allowUrlFopen = preg_match('/1|yes|on|true/i', ini_get('allow_url_fopen'));
if ($allowUrlFopen) {
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
return $wp_filesystem->get_contents($url);
}
}
if($custom_cookie != '')
{
$headers[] = 'Cookie: ' . $custom_cookie;
curl_setopt($ch, CURLOPT_COOKIE , $custom_cookie);
}
$options = array(
CURLOPT_COOKIEJAR => get_temp_dir() . 'crawlomaticcookie.txt',
CURLOPT_COOKIEFILE => get_temp_dir() . 'crawlomaticcookie.txt',
CURLOPT_USERAGENT => $user_agent,
CURLOPT_ENCODING => 'gzip, deflate',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_TIMEOUT => 60,
CURLOPT_MAXREDIRS => 10,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_VERBOSE => true,
CURLOPT_URL => $url,
CURLOPT_HTTPHEADER => $headers
);
if($custom_cookie != '')
{
unset($options[CURLOPT_COOKIEJAR]);
unset($options[CURLOPT_COOKIEFILE]);
}
if (isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled') {
$options[CURLOPT_PROXY] = $crawlomatic_Main_Settings['proxy_url'];
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '') {
$options[CURLOPT_PROXYUSERPWD] = $crawlomatic_Main_Settings['proxy_auth'];
}
}
curl_setopt_array($ch, $options);
$content = curl_exec($ch);
if($content === false)
{
crawlomatic_log_to_file('Error occured in curl: ' . curl_error($ch) . ', url: ' . $url);
$allowUrlFopen = preg_match('/1|yes|on|true/i', ini_get('allow_url_fopen'));
if ($allowUrlFopen) {
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
return $wp_filesystem->get_contents($url);
}
}
curl_close($ch);
}
return $content;
}
function crawlomatic_assign_var(&$target, $var, $root = false) {
static $cnt = 0;
$key = key($var);
if(is_array($var[$key]))
crawlomatic_assign_var($target[$key], $var[$key], false);
else {
if($key==0)
{
if($cnt == 0 && $root == true)
{
$target['_crawlomaticr_nonce'] = $var[$key];
$cnt++;
}
elseif($cnt == 1 && $root == true)
{
$target['_wp_http_referer'] = $var[$key];
$cnt++;
}
else
{
$target[] = $var[$key];
}
}
else
{
$target[$key] = $var[$key];
}
}
}
$plugin = plugin_basename(__FILE__);
if(is_admin())
{
if($_SERVER["REQUEST_METHOD"]==="POST" && !empty($_POST["coderevolution_max_input_var_data"])) {
$vars = explode("&", $_POST["coderevolution_max_input_var_data"]);
$coderevolution_max_input_var_data = array();
foreach($vars as $var) {
parse_str($var, $variable);
crawlomatic_assign_var($_POST, $variable, true);
}
unset($_POST["coderevolution_max_input_var_data"]);
}
if(!is_multisite() || is_main_site())
{
$plugin_slug = explode('/', $plugin);
$plugin_slug = $plugin_slug[0];
if(isset($_POST[$plugin_slug . '_register']) && isset($_POST[$plugin_slug. '_register_code']) && trim($_POST[$plugin_slug . '_register_code']) != '')
{
update_option('coderevolution_settings_changed', 1);
if(strlen(trim($_POST[$plugin_slug . '_register_code'])) != 36 || strstr($_POST[$plugin_slug . '_register_code'], '-') == false)
{
crawlomatic_log_to_file('Invalid registration code submitted: ' . $_POST[$plugin_slug . '_register_code']);
}
else
{
$ch = curl_init('https://wpinitiate.com/verify-purchase/purchase.php');
if($ch !== false)
{
$data = array();
$data['code'] = trim($_POST[$plugin_slug . '_register_code']);
$data['siteURL'] = get_bloginfo('url');
$data['siteName'] = get_bloginfo('name');
$data['siteEmail'] = get_bloginfo('admin_email');
$fdata = "";
foreach ($data as $key => $val) {
$fdata .= "$key=" . urlencode(trim($val)) . "&";
}
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $fdata);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$result = curl_exec($ch);
if($result === false)
{
crawlomatic_log_to_file('Failed to get verification response: ' . curl_error($ch));
}
else
{
$rj = json_decode($result, true);
if(isset($rj['error']))
{
update_option('coderevolution_settings_changed', $rj['error']);
}
elseif(isset($rj['item_name']))
{
$rj['code'] = $_POST[$plugin_slug . '_register_code'];
if($rj['item_id'] == '20476010' || $rj['item_id'] == '13371337' || $rj['item_id'] == '19200046')
{
update_option($plugin_slug . '_registration', $rj);
update_option('coderevolution_settings_changed', 2);
}
else
{
crawlomatic_log_to_file('Invalid response from purchase code verification (are you sure you inputed the right purchase code?): ' . print_r($rj, true));
}
}
else
{
crawlomatic_log_to_file('Invalid json from purchase code verification: ' . print_r($result, true));
}
}
curl_close($ch);
}
else
{
crawlomatic_log_to_file('Failed to init curl when trying to make purchase verification.');
}
}
}
if(isset($_POST[$plugin_slug . '_revoke_license']) && trim($_POST[$plugin_slug . '_revoke_license']) != '')
{
$ch = curl_init('https://wpinitiate.com/verify-purchase/revoke.php');
if($ch !== false)
{
$data = array();
$data['siteURL'] = get_bloginfo('url');
$fdata = "";
foreach ($data as $key => $val) {
$fdata .= "$key=" . urlencode(trim($val)) . "&";
}
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $fdata);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$result = curl_exec($ch);
if($result === false)
{
crawlomatic_log_to_file('Failed to revoke verification response: ' . curl_error($ch));
update_option($plugin_slug . '_registration', false);
}
else
{
update_option($plugin_slug . '_registration', false);
}
}
else
{
crawlomatic_log_to_file('Failed to init curl to revoke verification response.');
}
}
$uoptions = get_option($plugin_slug . '_registration', array());
if(isset($uoptions['item_id']) && isset($uoptions['item_name']) && isset($uoptions['created_at']) && isset($uoptions['buyer']) && isset($uoptions['licence']) && isset($uoptions['supported_until']))
{
require "update-checker/plugin-update-checker.php";
$fwdu3dcarPUC = YahnisElsts\PluginUpdateChecker5\PucFactory::buildUpdateChecker("https://wpinitiate.com/auto-update/?action=get_metadata&slug=crawlomatic-multipage-scraper-post-generator", __FILE__, "crawlomatic-multipage-scraper-post-generator");
}
else
{
add_action("after_plugin_row_{$plugin}", function( $plugin_file, $plugin_data, $status ) {
$plugin_url = 'https://codecanyon.net/item/crawlomatic-multisite-scraper-post-generator-plugin-for-wordpress/20476010';
echo '<tr class="active"><td> </td><td colspan="2"><p class="cr_auto_update">';
echo sprintf( wp_kses( __( 'The plugin is not registered. Automatic updating is disabled. Please purchase a license for it from <a href="%s" target="_blank">here</a> and register the plugin from the \'Main Settings\' menu using your purchase code. <a href="%s" target="_blank">How I find my purchase code?', 'crawlomatic-multipage-scraper-post-generator'), array( 'a' => array( 'href' => array(), 'target' => array() ) ) ), esc_url( 'https://1.envato.market/c/1264868/275988/4415?u=' . urlencode($plugin_url)), esc_url('//www.youtube.com/watch?v=NElJ5t_Wd48') );
echo '</a></p> </td></tr>';
}, 10, 3 );
add_action('admin_enqueue_scripts', 'crawlomatic_admin_enqueue_all');
add_filter("plugin_action_links_$plugin", 'crawlomatic_add_activation_link');
}
}
add_action('admin_init', 'crawlomatic_register_mysettings');
add_action('add_meta_boxes', 'crawlomatic_add_meta_box');
add_filter("plugin_action_links_$plugin", 'crawlomatic_add_settings_link');
add_filter("plugin_action_links_$plugin", 'crawlomatic_add_rating_link');
add_action('admin_menu', 'crawlomatic_register_my_custom_menu_page');
add_action('network_admin_menu', 'crawlomatic_register_my_custom_menu_page');
add_filter("plugin_action_links_$plugin", 'crawlomatic_add_support_link');
require(dirname(__FILE__) . "/res/crawlomatic-main.php");
require(dirname(__FILE__) . "/res/crawlomatic-rules-list.php");
require(dirname(__FILE__) . "/res/crawlomatic-logs.php");
require(dirname(__FILE__) . "/res/crawlomatic-helper.php");
require(dirname(__FILE__) . "/res/crawlomatic-offer.php");
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if ((isset($_GET['crawlomatic_dismiss']) || (isset($_GET['page']) && ($_GET['page'] == 'crawlomatic_admin_settings' || $_GET['page'] == 'crawlomatic_items_panel'))) && (!isset($crawlomatic_Main_Settings['headlessbrowserapi_key']) || trim($crawlomatic_Main_Settings['headlessbrowserapi_key']) == ''))
{
require_once( plugin_dir_path(__FILE__) . 'admin-notice.php' );
$notices = Crawlomatic_Admin_Notices::get_instance();
$notices->success( esc_html__('Latest Update for Crawlomatic!', 'crawlomatic-multipage-scraper-post-generator'), sprintf( wp_kses( __( '<b>Scrape JavaScript rendered content</b> from web pages using the new <b><a href="%s" target="_blank">HeadlessBrowserAPI</a></b>! Check more info, <a href="%s" target="_blank">here</a>.<br/>It will handle scraping pages for you using any of the following headless browsers: Puppeteer, Tor or PhantomJS, so you can get the JavaScript rendered HTML from any web page with a simple API call (no need to install anything on your server)!<br><br><b>Bonus tip:</b> If you select to use the Tor browser in the API to scrape content, Dark Web (.onion) links can also be scraped! Also, this will automatically use a random proxy to access sites, so IP based access limitations will not be an issue any more.', 'crawlomatic-multipage-scraper-post-generator'), array( 'a' => array( 'href' => array(), 'target' => array() ), 'b' => array( ), 'br' => array( ) ) ), esc_url( 'https://headlessbrowserapi.com/' ), esc_url( 'https://headlessbrowserapi.com/about/' ) ), 'headlessbrowserapi-notice' );
}
}
function crawlomatic_admin_enqueue_all()
{
$reg_css_code = '.cr_auto_update{background-color:#fff8e5;margin:5px 20px 15px 20px;border-left:4px solid #fff;padding:12px 12px 12px 12px !important;border-left-color:#ffb900;}';
wp_register_style( 'crawlomatic-plugin-reg-style', false );
wp_enqueue_style( 'crawlomatic-plugin-reg-style' );
wp_add_inline_style( 'crawlomatic-plugin-reg-style', $reg_css_code );
}
function crawlomatic_add_activation_link($links)
{
$settings_link = '<a href="admin.php?page=crawlomatic_admin_settings">' . esc_html__('Activate Plugin License', 'crawlomatic-multipage-scraper-post-generator') . '</a>';
array_push($links, $settings_link);
return $links;
}
use \Eventviva\ImageResize;
use vipnytt\SitemapParser;
use vipnytt\SitemapParser\Exceptions\SitemapParserException;
function crawlomatic_register_my_custom_menu_page()
{
add_menu_page('Crawlomatic Multipage Scraper', 'Crawlomatic Multipage Scraper', 'manage_options', 'crawlomatic_admin_settings', 'crawlomatic_admin_settings', plugins_url('images/icon.png', __FILE__));
$main = add_submenu_page('crawlomatic_admin_settings', esc_html__("Main Settings", 'crawlomatic-multipage-scraper-post-generator'), esc_html__("Main Settings", 'crawlomatic-multipage-scraper-post-generator'), 'manage_options', 'crawlomatic_admin_settings');
add_action( 'load-' . $main, 'crawlomatic_load_all_admin_js' );
add_action( 'load-' . $main, 'crawlomatic_load_main_admin_js' );
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['crawlomatic_enabled']) && $crawlomatic_Main_Settings['crawlomatic_enabled'] == 'on') {
$crawl = add_submenu_page('crawlomatic_admin_settings', esc_html__('Web Crawl to Posts', 'crawlomatic-multipage-scraper-post-generator'), esc_html__('Web Crawl to Posts', 'crawlomatic-multipage-scraper-post-generator'), 'manage_options', 'crawlomatic_items_panel', 'crawlomatic_items_panel');
add_action( 'load-' . $crawl, 'crawlomatic_load_admin_js' );
add_action( 'load-' . $crawl, 'crawlomatic_load_all_admin_js' );
$help = add_submenu_page('crawlomatic_admin_settings', esc_html__('Crawling Helper', 'crawlomatic-multipage-scraper-post-generator'), esc_html__('Crawling Helper', 'crawlomatic-multipage-scraper-post-generator'), 'manage_options', 'crawlomatic_helper', 'crawlomatic_helper');
add_action( 'load-' . $help, 'crawlomatic_load_all_admin_js' );
add_action( 'load-' . $help, 'crawlomatic_load_helper_js' );
$tips = add_submenu_page('crawlomatic_admin_settings', esc_html__('Tips & Tricks', 'crawlomatic-multipage-scraper-post-generator'), esc_html__('Tips & Tricks', 'crawlomatic-multipage-scraper-post-generator'), 'manage_options', 'crawlomatic_recommendations', 'crawlomatic_recommendations');
add_action( 'load-' . $tips, 'crawlomatic_load_all_admin_js' );
$log = add_submenu_page('crawlomatic_admin_settings', esc_html__("Activity & Logging", 'crawlomatic-multipage-scraper-post-generator'), esc_html__("Activity & Logging", 'crawlomatic-multipage-scraper-post-generator'), 'manage_options', 'crawlomatic_logs', 'crawlomatic_logs');
add_action( 'load-' . $log, 'crawlomatic_load_all_admin_js' );
}
}
function crawlomatic_load_admin_js(){
add_action('admin_enqueue_scripts', 'crawlomatic_enqueue_admin_js');
}
function crawlomatic_enqueue_admin_js(){
wp_enqueue_script('crawlomatic-footer-script', plugins_url('scripts/footer.js', __FILE__), array('jquery'), false, true);
$cr_miv = ini_get('max_input_vars');
if($cr_miv === null || $cr_miv === false || !is_numeric($cr_miv))
{
$cr_miv = '9999999';
}
$footer_conf_settings = array(
'max_input_vars' => $cr_miv,
'plugin_dir_url' => plugin_dir_url(__FILE__),
'ajaxurl' => admin_url('admin-ajax.php')
);
wp_localize_script('crawlomatic-footer-script', 'mycustomsettings', $footer_conf_settings);
wp_register_style('crawlomatic-rules-style', plugins_url('styles/crawlomatic-rules.css', __FILE__), false, '1.0.0');
wp_enqueue_style('crawlomatic-rules-style');
}
function crawlomatic_load_helper_js(){
add_action('admin_enqueue_scripts', 'crawlomatic_admin_load_helper');
}
function crawlomatic_admin_load_helper()
{
wp_enqueue_script('crawlomatic-helper-script', plugins_url('scripts/helper.js', __FILE__), array('jquery'), false, true);
}
function crawlomatic_load_main_admin_js(){
add_action('admin_enqueue_scripts', 'crawlomatic_enqueue_main_admin_js');
}
function crawlomatic_enqueue_main_admin_js(){
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
wp_enqueue_script('crawlomatic-main-script', plugins_url('scripts/main.js', __FILE__), array('jquery'));
if(!isset($crawlomatic_Main_Settings['best_user']))
{
$best_user = '';
}
else
{
$best_user = $crawlomatic_Main_Settings['best_user'];
}
if(!isset($crawlomatic_Main_Settings['best_password']))
{
$best_password = '';
}
else
{
$best_password = $crawlomatic_Main_Settings['best_password'];
}
$header_main_settings = array(
'best_user' => $best_user,
'best_password' => $best_password
);
wp_localize_script('crawlomatic-main-script', 'mycustommainsettings', $header_main_settings);
}
function crawlomatic_load_all_admin_js(){
add_action('admin_enqueue_scripts', 'crawlomatic_admin_load_files');
}
function crawlomatic_isSecure() {
return
(!empty($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== 'off')
|| $_SERVER['SERVER_PORT'] == 443;
}
function crawlomatic_add_rating_link($links)
{
$settings_link = '<a href="//codecanyon.net/downloads" target="_blank" title="Rate">
<i class="wdi-rate-stars"><svg xmlns="http://www.w3.org/2000/svg" width="15" height="15" viewBox="0 0 24 24" fill="#ffb900" stroke="#ffb900" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-star"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"></polygon></svg><svg xmlns="http://www.w3.org/2000/svg" width="15" height="15" viewBox="0 0 24 24" fill="#ffb900" stroke="#ffb900" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-star"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"></polygon></svg><svg xmlns="http://www.w3.org/2000/svg" width="15" height="15" viewBox="0 0 24 24" fill="#ffb900" stroke="#ffb900" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-star"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"></polygon></svg><svg xmlns="http://www.w3.org/2000/svg" width="15" height="15" viewBox="0 0 24 24" fill="#ffb900" stroke="#ffb900" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-star"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"></polygon></svg><svg xmlns="http://www.w3.org/2000/svg" width="15" height="15" viewBox="0 0 24 24" fill="#ffb900" stroke="#ffb900" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-star"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"></polygon></svg></i></a>';
array_push($links, $settings_link);
return $links;
}
function crawlomatic_add_support_link($links)
{
$settings_link = '<a href="//coderevolution.ro/knowledge-base/" target="_blank">' . esc_html__('Support', 'crawlomatic-multipage-scraper-post-generator') . '</a>';
array_push($links, $settings_link);
return $links;
}
function crawlomatic_add_settings_link($links)
{
$settings_link = '<a href="admin.php?page=crawlomatic_admin_settings">' . esc_html__('Settings', 'crawlomatic-multipage-scraper-post-generator') . '</a>';
array_push($links, $settings_link);
return $links;
}
function crawlomatic_add_meta_box()
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['crawlomatic_enabled']) && $crawlomatic_Main_Settings['crawlomatic_enabled'] === 'on') {
if (isset($crawlomatic_Main_Settings['enable_metabox']) && $crawlomatic_Main_Settings['enable_metabox'] == 'on') {
foreach ( get_post_types( '', 'names' ) as $post_type ) {
add_meta_box('crawlomatic_meta_box_function_add', esc_html__('Crawlomatic Auto Generated Post Information', 'crawlomatic-multipage-scraper-post-generator'), 'crawlomatic_meta_box_function', $post_type, 'advanced', 'default', array('__back_compat_meta_box' => true));
}
}
}
}
function crawlomatic_get_blog_timezone() {
$tzstring = get_option( 'timezone_string' );
$offset = get_option( 'gmt_offset' );
if( empty( $tzstring ) && 0 != $offset && floor( $offset ) == $offset ){
$offset_st = $offset > 0 ? "-$offset" : '+'.absint( $offset );
$tzstring = 'Etc/GMT'.$offset_st;
}
if( empty( $tzstring ) ){
$tzstring = 'UTC';
}
$timezone = new DateTimeZone( $tzstring );
return $timezone;
}
function crawlomatic_builtin_spin_text($title, $content)
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$titleSeparator = '[19459000]';
$text = $title . ' ' . $titleSeparator . ' ' . $content;
$no_spin_words = array();
if (isset($crawlomatic_Main_Settings['no_spin']) && $crawlomatic_Main_Settings['no_spin'] != '') {
$no_spin_words = explode(',', $crawlomatic_Main_Settings['no_spin']);
$no_spin_words = array_map('trim',$no_spin_words);
}
try {
$file=file(dirname(__FILE__) .'/res/synonyms.dat');
foreach($file as $line){
$synonyms=explode('|', $line);
foreach($synonyms as $word){
if(trim($word) != '' && !in_array($word, $no_spin_words)){
$word=str_replace('/','\/',$word);
if(preg_match('/'. $word .'/u', $text)) {
$rand = array_rand($synonyms, 1);
$text1 = preg_replace('/'.$word.'/u', trim($synonyms[$rand]), $text);
if($text1 !== null)
{
$text = $text1;
}
}
$uword=ucfirst($word);
if(preg_match('/'. $uword .'/u', $text)) {
$rand = array_rand($synonyms, 1);
$text1 = preg_replace('/'.$uword.'/u', ucfirst(trim($synonyms[$rand])), $text);
if($text1 !== null)
{
$text = $text1;
}
}
}
}
}
$translated = $text;
}
catch (Exception $e) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Exception thrown in spinText ' . $e);
}
return false;
}
if (stristr($translated, $titleSeparator)) {
$contents = explode($titleSeparator, $translated);
$title = $contents[0];
$content = $contents[1];
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to parse spinned content, separator not found');
}
return false;
}
return array(
$title,
$content
);
}
add_filter('cron_schedules', 'crawlomatic_add_cron_schedule');
function crawlomatic_add_cron_schedule($schedules)
{
$schedules['crawlomatic_cron'] = array(
'interval' => 3600,
'display' => esc_html__('Crawlomatic Cron', 'crawlomatic-multipage-scraper-post-generator')
);
$schedules['minutely'] = array(
'interval' => 60,
'display' => esc_html__('Once A Minute', 'crawlomatic-multipage-scraper-post-generator')
);
$schedules['weekly'] = array(
'interval' => 604800,
'display' => esc_html__('Once Weekly', 'crawlomatic-multipage-scraper-post-generator')
);
$schedules['monthly'] = array(
'interval' => 2592000,
'display' => esc_html__('Once Monthly', 'crawlomatic-multipage-scraper-post-generator')
);
return $schedules;
}
function crawlomatic_auto_clear_log()
{
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if ($wp_filesystem->exists(WP_CONTENT_DIR . '/crawlomatic_info.log')) {
$wp_filesystem->delete(WP_CONTENT_DIR . '/crawlomatic_info.log');
}
}
add_shortcode( 'crawlomatic-display-posts', 'crawlomatic_display_posts_shortcode' );
function crawlomatic_display_posts_shortcode( $atts ) {
if ( is_admin() )
{
return;
}
$original_atts = $atts;
$atts = shortcode_atts( array(
'author' => '',
'category' => '',
'category_display' => '',
'category_label' => 'Posted in: ',
'content_class' => 'content',
'date_format' => '(n/j/Y)',
'date' => '',
'date_column' => 'post_date',
'date_compare' => '=',
'date_query_before' => '',
'date_query_after' => '',
'date_query_column' => '',
'date_query_compare' => '',
'display_posts_off' => false,
'excerpt_length' => false,
'excerpt_more' => false,
'excerpt_more_link' => false,
'exclude_current' => false,
'id' => false,
'ignore_sticky_posts' => false,
'image_size' => false,
'include_author' => false,
'include_content' => false,
'include_date' => false,
'include_excerpt' => false,
'include_link' => true,
'include_title' => true,
'meta_key' => '',
'meta_value' => '',
'no_posts_message' => '',
'offset' => 0,
'order' => 'DESC',
'orderby' => 'date',
'post_parent' => false,
'post_status' => 'publish',
'post_type' => 'post',
'posts_per_page' => '10',
'tag' => '',
'tax_operator' => 'IN',
'tax_include_children' => true,
'tax_term' => false,
'taxonomy' => false,
'time' => '',
'title' => '',
'title_color' => '#000000',
'excerpt_color' => '#000000',
'link_to_source' => '',
'title_font_size' => '100%',
'excerpt_font_size' => '100%',
'read_more_text' => '',
'wrapper' => 'ul',
'wrapper_class' => 'display-posts-listing',
'wrapper_id' => false,
'ruleid' => ''
), $atts, 'display-posts' );
if( $atts['display_posts_off'] )
return;
$author = sanitize_text_field( $atts['author'] );
$ruleid = sanitize_text_field( $atts['ruleid'] );
$category = sanitize_text_field( $atts['category'] );
$category_display = 'true' == $atts['category_display'] ? 'category' : sanitize_text_field( $atts['category_display'] );
$category_label = sanitize_text_field( $atts['category_label'] );
$content_class = array_map( 'sanitize_html_class', ( explode( ' ', $atts['content_class'] ) ) );
$date_format = sanitize_text_field( $atts['date_format'] );
$date = sanitize_text_field( $atts['date'] );
$date_column = sanitize_text_field( $atts['date_column'] );
$date_compare = sanitize_text_field( $atts['date_compare'] );
$date_query_before = sanitize_text_field( $atts['date_query_before'] );
$date_query_after = sanitize_text_field( $atts['date_query_after'] );
$date_query_column = sanitize_text_field( $atts['date_query_column'] );
$date_query_compare = sanitize_text_field( $atts['date_query_compare'] );
$excerpt_length = intval( $atts['excerpt_length'] );
$excerpt_more = sanitize_text_field( $atts['excerpt_more'] );
$excerpt_more_link = filter_var( $atts['excerpt_more_link'], FILTER_VALIDATE_BOOLEAN );
$exclude_current = filter_var( $atts['exclude_current'], FILTER_VALIDATE_BOOLEAN );
$id = $atts['id'];
$ignore_sticky_posts = filter_var( $atts['ignore_sticky_posts'], FILTER_VALIDATE_BOOLEAN );
$image_size = sanitize_key( $atts['image_size'] );
$include_title = filter_var( $atts['include_title'], FILTER_VALIDATE_BOOLEAN );
$include_author = filter_var( $atts['include_author'], FILTER_VALIDATE_BOOLEAN );
$include_content = filter_var( $atts['include_content'], FILTER_VALIDATE_BOOLEAN );
$include_date = filter_var( $atts['include_date'], FILTER_VALIDATE_BOOLEAN );
$include_excerpt = filter_var( $atts['include_excerpt'], FILTER_VALIDATE_BOOLEAN );
$include_link = filter_var( $atts['include_link'], FILTER_VALIDATE_BOOLEAN );
$meta_key = sanitize_text_field( $atts['meta_key'] );
$meta_value = sanitize_text_field( $atts['meta_value'] );
$no_posts_message = sanitize_text_field( $atts['no_posts_message'] );
$offset = intval( $atts['offset'] );
$order = sanitize_key( $atts['order'] );
$orderby = sanitize_key( $atts['orderby'] );
$post_parent = $atts['post_parent'];
$post_status = $atts['post_status'];
$post_type = sanitize_text_field( $atts['post_type'] );
$posts_per_page = intval( $atts['posts_per_page'] );
$tag = sanitize_text_field( $atts['tag'] );
$tax_operator = $atts['tax_operator'];
$tax_include_children = filter_var( $atts['tax_include_children'], FILTER_VALIDATE_BOOLEAN );
$tax_term = sanitize_text_field( $atts['tax_term'] );
$taxonomy = sanitize_key( $atts['taxonomy'] );
$time = sanitize_text_field( $atts['time'] );
$shortcode_title = sanitize_text_field( $atts['title'] );
$title_color = sanitize_text_field( $atts['title_color'] );
$excerpt_color = sanitize_text_field( $atts['excerpt_color'] );
$link_to_source = sanitize_text_field( $atts['link_to_source'] );
$excerpt_font_size = sanitize_text_field( $atts['excerpt_font_size'] );
$title_font_size = sanitize_text_field( $atts['title_font_size'] );
$read_more_text = sanitize_text_field( $atts['read_more_text'] );
$wrapper = sanitize_text_field( $atts['wrapper'] );
$wrapper_class = array_map( 'sanitize_html_class', ( explode( ' ', $atts['wrapper_class'] ) ) );
if( !empty( $wrapper_class ) )
$wrapper_class = ' class="' . implode( ' ', $wrapper_class ) . '"';
$wrapper_id = sanitize_html_class( $atts['wrapper_id'] );
if( !empty( $wrapper_id ) )
$wrapper_id = ' id="' . esc_html($wrapper_id) . '"';
$args = array(
'category_name' => $category,
'order' => $order,
'orderby' => $orderby,
'post_type' => explode( ',', $post_type ),
'posts_per_page' => $posts_per_page,
'tag' => $tag,
);
if ( ! empty( $date ) || ! empty( $time ) || ! empty( $date_query_after ) || ! empty( $date_query_before ) ) {
$initial_date_query = $date_query_top_lvl = array();
$valid_date_columns = array(
'post_date', 'post_date_gmt', 'post_modified', 'post_modified_gmt',
'comment_date', 'comment_date_gmt'
);
$valid_compare_ops = array( '=', '!=', '>', '>=', '<', '<=', 'IN', 'NOT IN', 'BETWEEN', 'NOT BETWEEN' );
$dates = crawlomatic_sanitize_date_time( $date );
if ( ! empty( $dates ) ) {
if ( is_string( $dates ) ) {
$timestamp = strtotime( $dates );
$dates = array(
'year' => date( 'Y', $timestamp ),
'month' => date( 'm', $timestamp ),
'day' => date( 'd', $timestamp ),
);
}
foreach ( $dates as $arg => $segment ) {
$initial_date_query[ $arg ] = $segment;
}
}
$times = crawlomatic_sanitize_date_time( $time, 'time' );
if ( ! empty( $times ) ) {
foreach ( $times as $arg => $segment ) {
$initial_date_query[ $arg ] = $segment;
}
}
$before = crawlomatic_sanitize_date_time( $date_query_before, 'date', true );
if ( ! empty( $before ) ) {
$initial_date_query['before'] = $before;
}
$after = crawlomatic_sanitize_date_time( $date_query_after, 'date', true );
if ( ! empty( $after ) ) {
$initial_date_query['after'] = $after;
}
if ( ! empty( $date_query_column ) && in_array( $date_query_column, $valid_date_columns ) ) {
$initial_date_query['column'] = $date_query_column;
}
if ( ! empty( $date_query_compare ) && in_array( $date_query_compare, $valid_compare_ops ) ) {
$initial_date_query['compare'] = $date_query_compare;
}
if ( ! empty( $date_column ) && in_array( $date_column, $valid_date_columns ) ) {
$date_query_top_lvl['column'] = $date_column;
}
if ( ! empty( $date_compare ) && in_array( $date_compare, $valid_compare_ops ) ) {
$date_query_top_lvl['compare'] = $date_compare;
}
if ( ! empty( $initial_date_query ) ) {
$date_query_top_lvl[] = $initial_date_query;
}
$args['date_query'] = $date_query_top_lvl;
}
$args['meta_key'] = 'crawlomatic_parent_rule';
if($ruleid != '')
{
$args['meta_value'] = $ruleid;
}
if( $ignore_sticky_posts )
$args['ignore_sticky_posts'] = true;
if( $id ) {
$posts_in = array_map( 'intval', explode( ',', $id ) );
$args['post__in'] = $posts_in;
}
if( is_singular() && $exclude_current )
$args['post__not_in'] = array( get_the_ID() );
if( !empty( $author ) ) {
if( 'current' == $author && is_user_logged_in() )
$args['author_name'] = wp_get_current_user()->user_login;
elseif( 'current' == $author )
$unrelevar = false;
else
$args['author_name'] = $author;
}
if( !empty( $offset ) )
$args['offset'] = $offset;
$post_status = explode( ', ', $post_status );
$validated = array();
$available = get_post_stati();
foreach ($available as $key => $val) {
if ($val == 'auto-draft') {
unset($available[$key]);
}
if ($val == 'inherit') {
unset($available[$key]);
}
if ($val == 'request-pending') {
unset($available[$key]);
}
if ($val == 'request-confirmed') {
unset($available[$key]);
}
if ($val == 'request-failed') {
unset($available[$key]);
}
if ($val == 'request-completed') {
unset($available[$key]);
}
}
foreach ( $post_status as $unvalidated )
if ( in_array( $unvalidated, $available ) )
$validated[] = $unvalidated;
if( !empty( $validated ) )
$args['post_status'] = $validated;
if ( !empty( $taxonomy ) && !empty( $tax_term ) ) {
if( 'current' == $tax_term ) {
global $post;
$terms = wp_get_post_terms(get_the_ID(), $taxonomy);
$tax_term = array();
foreach ($terms as $term) {
$tax_term[] = $term->slug;
}
}else{
$tax_term = explode( ', ', $tax_term );
}
if( !in_array( $tax_operator, array( 'IN', 'NOT IN', 'AND' ) ) )
$tax_operator = 'IN';
$tax_args = array(
'tax_query' => array(
array(
'taxonomy' => $taxonomy,
'field' => 'slug',
'terms' => $tax_term,
'operator' => $tax_operator,
'include_children' => $tax_include_children,
)
)
);
$count = 2;
$more_tax_queries = false;
while(
isset( $original_atts['taxonomy_' . $count] ) && !empty( $original_atts['taxonomy_' . $count] ) &&
isset( $original_atts['tax_' . esc_html($count) . '_term'] ) && !empty( $original_atts['tax_' . esc_html($count) . '_term'] )
):
$more_tax_queries = true;
$taxonomy = sanitize_key( $original_atts['taxonomy_' . $count] );
$terms = explode( ', ', sanitize_text_field( $original_atts['tax_' . esc_html($count) . '_term'] ) );
$tax_operator = isset( $original_atts['tax_' . esc_html($count) . '_operator'] ) ? $original_atts['tax_' . esc_html($count) . '_operator'] : 'IN';
$tax_operator = in_array( $tax_operator, array( 'IN', 'NOT IN', 'AND' ) ) ? $tax_operator : 'IN';
$tax_include_children = isset( $original_atts['tax_' . esc_html($count) . '_include_children'] ) ? filter_var( $atts['tax_' . esc_html($count) . '_include_children'], FILTER_VALIDATE_BOOLEAN ) : true;
$tax_args['tax_query'][] = array(
'taxonomy' => $taxonomy,
'field' => 'slug',
'terms' => $terms,
'operator' => $tax_operator,
'include_children' => $tax_include_children,
);
$count++;
endwhile;
if( $more_tax_queries ):
$tax_relation = 'AND';
if( isset( $original_atts['tax_relation'] ) && in_array( $original_atts['tax_relation'], array( 'AND', 'OR' ) ) )
$tax_relation = $original_atts['tax_relation'];
$args['tax_query']['relation'] = $tax_relation;
endif;
$args = array_merge_recursive( $args, $tax_args );
}
if( $post_parent !== false ) {
if( 'current' == $post_parent ) {
global $post;
$post_parent = get_the_ID();
}
$args['post_parent'] = intval( $post_parent );
}
$wrapper_options = array( 'ul', 'ol', 'div' );
if( ! in_array( $wrapper, $wrapper_options ) )
$wrapper = 'ul';
$inner_wrapper = 'div' == $wrapper ? 'div' : 'li';
$listing = new WP_Query( apply_filters( 'display_posts_shortcode_args', $args, $original_atts ) );
if ( ! $listing->have_posts() ) {
return apply_filters( 'display_posts_shortcode_no_results', wpautop( $no_posts_message ) );
}
$inner = '';
wp_suspend_cache_addition(true);
while ( $listing->have_posts() ): $listing->the_post(); global $post;
$image = $date = $author = $excerpt = $content = '';
if ( $include_title && $include_link ) {
if($link_to_source == 'yes')
{
$source_url = get_post_meta($post->ID, 'crawlomatic_post_url', true);
if($source_url != '')
{
$title = '<a class="crawlomatic_display_title" href="' . esc_url($source_url) . '"><span class="cr_display_span" >' . get_the_title() . '</span></a>';
}
else
{
$title = '<a class="crawlomatic_display_title" href="' . apply_filters( 'the_permalink', get_permalink() ) . '"><span class="cr_display_span" >' . get_the_title() . '</span></a>';
}
}
else
{
$title = '<a class="crawlomatic_display_title" href="' . apply_filters( 'the_permalink', get_permalink() ) . '"><span class="cr_display_span" >' . get_the_title() . '</span></a>';
}
} elseif( $include_title ) {
$title = '<span class="crawlomatic_display_title" class="cr_display_span">' . get_the_title() . '</span>';
} else {
$title = '';
}
if ( $image_size && has_post_thumbnail() && $include_link ) {
if($link_to_source == 'yes')
{
$source_url = get_post_meta($post->ID, 'crawlomatic_post_url', true);
if($source_url != '')
{
$image = '<a class="crawlomatic_display_image" href="' . esc_url($source_url) . '">' . get_the_post_thumbnail( get_the_ID(), $image_size ) . '</a> <br/>';
}
else
{
$image = '<a class="crawlomatic_display_image" href="' . get_permalink() . '">' . get_the_post_thumbnail( get_the_ID(), $image_size ) . '</a> <br/>';
}
}
else
{
$image = '<a class="crawlomatic_display_image" href="' . get_permalink() . '">' . get_the_post_thumbnail( get_the_ID(), $image_size ) . '</a> <br/>';
}
} elseif( $image_size && has_post_thumbnail() ) {
$image = '<span class="crawlomatic_display_image">' . get_the_post_thumbnail( get_the_ID(), $image_size ) . '</span> <br/>';
}
if ( $include_date )
$date = ' <span class="date">' . get_the_date( $date_format ) . '</span>';
if( $include_author )
$author = apply_filters( 'display_posts_shortcode_author', ' <span class="crawlomatic_display_author">by ' . get_the_author() . '</span>', $original_atts );
if ( $include_excerpt ) {
if( $excerpt_length || $excerpt_more || $excerpt_more_link ) {
$length = $excerpt_length ? $excerpt_length : apply_filters( 'excerpt_length', 55 );
$more = $excerpt_more ? $excerpt_more : apply_filters( 'excerpt_more', '' );
$more = $excerpt_more_link ? ' <a href="' . get_permalink() . '">' . esc_html($more) . '</a>' : ' ' . esc_html($more);
if( has_excerpt() && apply_filters( 'display_posts_shortcode_full_manual_excerpt', false ) ) {
$excerpt = $post->post_excerpt . $more;
} elseif( has_excerpt() ) {
$excerpt = crawlomatic_wp_trim_words( strip_shortcodes( $post->post_excerpt ), $length, $more );
} else {
$excerpt = crawlomatic_wp_trim_words( strip_shortcodes( $post->post_content ), $length, $more );
}
} else {
$excerpt = get_the_excerpt();
}
$excerpt = ' <br/><br/> <span class="crawlomatic_display_excerpt" class="cr_display_excerpt_adv">' . $excerpt . '</span>';
if($read_more_text != '')
{
if($link_to_source == 'yes')
{
$source_url = get_post_meta($post->ID, 'crawlomatic_post_url', true);
if($source_url != '')
{
$excerpt .= '<br/><a href="' . esc_url($source_url) . '"><span class="crawlomatic_display_excerpt" class="cr_display_excerpt_adv">' . esc_html($read_more_text) . '</span></a>';
}
else
{
$excerpt .= '<br/><a href="' . get_permalink() . '"><span class="crawlomatic_display_excerpt" class="cr_display_excerpt_adv">' . esc_html($read_more_text) . '</span></a>';
}
}
else
{
$excerpt .= '<br/><a href="' . get_permalink() . '"><span class="crawlomatic_display_excerpt" class="cr_display_excerpt_adv">' . esc_html($read_more_text) . '</span></a>';
}
}
}
if( $include_content ) {
add_filter( 'shortcode_atts_display-posts', 'crawlomatic_display_posts_off', 10, 3 );
$content = '<div class="' . implode( ' ', $content_class ) . '">' . apply_filters( 'the_content', get_the_content() ) . '</div>';
remove_filter( 'shortcode_atts_display-posts', 'crawlomatic_display_posts_off', 10, 3 );
}
$category_display_text = '';
if( $category_display && is_object_in_taxonomy( get_post_type(), $category_display ) ) {
$terms = get_the_terms( get_the_ID(), $category_display );
$term_output = array();
foreach( $terms as $term )
$term_output[] = '<a href="' . get_term_link( $term, $category_display ) . '">' . esc_url($term->name) . '</a>';
$category_display_text = ' <span class="category-display"><span class="category-display-label">' . esc_html($category_label) . '</span> ' . implode( ', ', $term_output ) . '</span>';
$category_display_text = apply_filters( 'display_posts_shortcode_category_display', $category_display_text );
}
$class = array( 'listing-item' );
$class = array_map( 'sanitize_html_class', apply_filters( 'display_posts_shortcode_post_class', $class, $post, $listing, $original_atts ) );
$output = '<br/><' . esc_html($inner_wrapper) . ' class="' . implode( ' ', $class ) . '">' . $image . $title . $date . $author . $category_display_text . $excerpt . $content . '</' . esc_html($inner_wrapper) . '><br/><br/><hr class="cr_hr_dot"/>'; $inner .= apply_filters( 'display_posts_shortcode_output', $output, $original_atts, $image, $title, $date, $excerpt, $inner_wrapper, $content, $class );
endwhile; wp_reset_postdata();
wp_suspend_cache_addition(false);
$open = apply_filters( 'display_posts_shortcode_wrapper_open', '<' . $wrapper . $wrapper_class . $wrapper_id . '>', $original_atts );
$close = apply_filters( 'display_posts_shortcode_wrapper_close', '</' . esc_html($wrapper) . '>', $original_atts );
$return = $open;
if( $shortcode_title ) {
$title_tag = apply_filters( 'display_posts_shortcode_title_tag', 'h2', $original_atts );
$return .= '<' . esc_html($title_tag) . ' class="display-posts-title">' . esc_html($shortcode_title) . '</' . esc_html($title_tag) . '>' . "
";
}
$return .= $inner . $close;
$reg_css_code = '.cr_hr_dot{border-top: dotted 1px;}.cr_display_span{font-size:' . esc_html($title_font_size) . ';color:' . esc_html($title_color) . ' !important;}.cr_display_excerpt_adv{font-size:' . esc_html($excerpt_font_size) . ';color:' . esc_html($excerpt_color) . ' !important;}';
wp_register_style( 'crawlomatic-display-style', false );
wp_enqueue_style( 'crawlomatic-display-style' );
wp_add_inline_style( 'crawlomatic-display-style', $reg_css_code );
return $return;
}
function crawlomatic_sanitize_date_time( $date_time, $type = 'date', $accepts_string = false ) {
if ( empty( $date_time ) || ! in_array( $type, array( 'date', 'time' ) ) ) {
return array();
}
$segments = array();
if (
true === $accepts_string
&& ( false !== strpos( $date_time, ' ' ) || false === strpos( $date_time, '-' ) )
) {
if ( false !== $timestamp = strtotime( $date_time ) ) {
return $date_time;
}
}
$parts = array_map( 'absint', explode( 'date' == $type ? '-' : ':', $date_time ) );
if ( 'date' == $type ) {
$year = $month = $day = 1;
if ( count( $parts ) >= 3 ) {
list( $year, $month, $day ) = $parts;
$year = ( $year >= 1 && $year <= 9999 ) ? $year : 1;
$month = ( $month >= 1 && $month <= 12 ) ? $month : 1;
$day = ( $day >= 1 && $day <= 31 ) ? $day : 1;
}
$segments = array(
'year' => $year,
'month' => $month,
'day' => $day
);
} elseif ( 'time' == $type ) {
$hour = $minute = $second = 0;
switch( count( $parts ) ) {
case 3 :
list( $hour, $minute, $second ) = $parts;
$hour = ( $hour >= 0 && $hour <= 23 ) ? $hour : 0;
$minute = ( $minute >= 0 && $minute <= 60 ) ? $minute : 0;
$second = ( $second >= 0 && $second <= 60 ) ? $second : 0;
break;
case 2 :
list( $hour, $minute ) = $parts;
$hour = ( $hour >= 0 && $hour <= 23 ) ? $hour : 0;
$minute = ( $minute >= 0 && $minute <= 60 ) ? $minute : 0;
break;
default : break;
}
$segments = array(
'hour' => $hour,
'minute' => $minute,
'second' => $second
);
}
return apply_filters( 'display_posts_shortcode_sanitized_segments', $segments, $date_time, $type );
}
function crawlomatic_display_posts_off( $out, $pairs, $atts ) {
$out['display_posts_off'] = apply_filters( 'display_posts_shortcode_inception_override', true );
return $out;
}
add_shortcode( 'crawlomatic-list-posts', 'crawlomatic_list_posts' );
function crawlomatic_list_posts( $atts ) {
if ( is_admin() )
{
return;
}
ob_start();
extract( shortcode_atts( array (
'type' => 'any',
'order' => 'ASC',
'orderby' => 'title',
'posts' => 50,
'posts_per_page' => 50,
'category' => '',
'ruleid' => ''
), $atts ) );
$options = array(
'post_type' => $type,
'order' => $order,
'orderby' => $orderby,
'posts_per_page' => $posts,
'category_name' => $category,
'meta_key' => 'crawlomatic_parent_rule',
'meta_value' => $ruleid
);
$query = new WP_Query( $options );
if ( $query->have_posts() ) { ?>
<ul class="clothes-listing">
<?php while ( $query->have_posts() ) : $query->the_post(); ?>
<li id="post-<?php the_ID(); ?>" <?php post_class(); ?>>
<a href="<?php echo esc_url(get_permalink()); ?>"><?php echo esc_html(get_the_title());?></a>
</li>
<?php endwhile;
wp_reset_postdata(); ?>
</ul>
<?php $myvariable = ob_get_clean();
return $myvariable;
}
return '';
}
register_deactivation_hook(__FILE__, 'crawlomatic_my_deactivation');
function crawlomatic_my_deactivation()
{
wp_clear_scheduled_hook('crawlomaticaction');
wp_clear_scheduled_hook('crawlomaticactionclear');
$running = array();
update_option('crawlomatic_running_list', $running, false);
update_option('crawlomatic_auto_running_list', $running, false);
}
add_action('crawlomaticaction', 'crawlomatic_cron');
add_action('crawlomaticactionclear', 'crawlomatic_auto_clear_log');
add_action('crawlomaticactionupdate', 'crawlomatic_auto_update_posts' );
function crawlomatic_clearFromAutoList($param)
{
$GLOBALS['wp_object_cache']->delete( 'crawlomatic_auto_running_list', 'options' );
$running = get_option('crawlomatic_auto_running_list');
$key = array_search($param, $running);
if ($key !== FALSE) {
unset($running[$key]);
update_option('crawlomatic_auto_running_list', $running, false);
}
else
{
crawlomatic_log_to_file ('[RunningList][ERROR] Failed to delete key from running list: ' . esc_html($param));
}
}
function crawlomatic_get_http_code($url, $request_delay)
{
$code = 0;
$delay = '';
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '')
{
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(1), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
if (function_exists('curl_init'))
{
$max_redirects = 10;
$ch = curl_init();
if($ch !== false)
{
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 90);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FORBID_REUSE, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, get_temp_dir() . 'crawlomaticcookie.txt');
curl_setopt($ch, CURLOPT_COOKIEFILE, get_temp_dir() . 'crawlomaticcookie.txt');
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
$finish = false;
if (ini_get('open_basedir') == '') {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, $max_redirects);
} else {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
$rch = curl_copy_handle($ch);
do
{
curl_setopt($rch, CURLOPT_URL, $url);
curl_setopt($rch, CURLOPT_REFERER, $url);
$header = curl_exec($rch);
if (curl_errno($rch)) {
$code = 0;
} else {
$code = curl_getinfo($rch, CURLINFO_HTTP_CODE);
if ($code == 301 || $code == 302) {
preg_match('/Location:(.*?)
/', $header, $matches);
$url = trim(array_pop($matches));
} else {
$finish = true;
}
}
} while ($finish == false && --$max_redirects);
curl_close($rch);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_REFERER, $url);
}
if($finish === false)
{
$headers = trim(curl_exec($ch));
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
curl_close($ch);
}
}
if($code === 0)
{
error_reporting(0);
$responseHeaders = get_headers($url, true);
error_reporting(E_ALL);
if ($responseHeaders !== false)
{
if (isset($responseHeaders[0]))
{
preg_match('/\d{3}/', $responseHeaders[0], $matches);
if (isset($matches[0]))
{
$code = $matches[0];
}
}
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
}
return $code;
}
function crawlomatic_lazy_loading_auto_fix($cont)
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
preg_match_all('{<img .*?>}s', $cont, $imgsMatchs);
$imgs_count = count($imgsMatchs[0]);
if($imgs_count < 1)
{
return $cont;
}
$our_lazy_tag = '';
if(stristr($cont, ' data-src='))
{
$our_lazy_tag = 'data-src';
}
elseif(stristr($cont, ' data-lazy-src='))
{
$our_lazy_tag = 'data-lazy-src';
}
elseif(stristr($cont, ' data-ezsrc='))
{
$our_lazy_tag = 'data-ezsrc';
}
elseif(stristr($cont, ' data-pagespeed-lazy-src='))
{
$our_lazy_tag = 'data-pagespeed-lazy-src';
}
else
{
$lazy_suspected = false;
$images_plain = implode (' ', $imgsMatchs[0]);
if (stristr($images_plain, 'lazy'))
{
$lazy_suspected = true;
}
else
{
preg_match_all('{ src[\s]?=[\s]?["|\'](.*?)["|\']}', $images_plain, $srcs_matches);
$found_srcs_count = count($srcs_matches[0]);
$unique_srcs_count = count(array_unique($srcs_matches[1]));
if ($found_srcs_count != 0)
{
$diff_percentage = ($found_srcs_count-$unique_srcs_count)*100/$found_srcs_count;
}
else
{
$diff_percentage = 0;
}
if ($diff_percentage > 39)
{
$lazy_suspected = true;
}
}
if ($lazy_suspected)
{
$remove_src_attr = preg_replace ('{ src[\s]?=[\s]?["|\'].*?["|\']}', ' ', $images_plain);
$remove_these = array (' alt', ' class', ' id', ' title');
if (!isset($crawlomatic_Main_Settings['keep_srcset']) || $crawlomatic_Main_Settings['keep_srcset'] != 'on')
{
$remove_these[] = ' srcset';
$remove_these[] = ' data-srcset';
}
$remove_src_attr = str_replace($remove_these, ' ', $remove_src_attr);
$remove_src_attr = preg_replace('{ [\w|-]*?[\s]?=[\s]?["|\'].{1,9}?["|\']}s', ' ', $remove_src_attr);
preg_match_all('{( [\w|-]*?)[\s]?=[\s]?["|\'][^",]*?/[^",]*?["|\']}', $remove_src_attr, $possible_src_matches);
$src_match_unique = array_unique($possible_src_matches[1]);
if (isset($src_match_unique[0]))
{
$our_lazy_tag = $src_match_unique[0];
}
}
}
$our_lazy_tag = trim($our_lazy_tag);
if ($our_lazy_tag == 'src' || $our_lazy_tag == '')
{
return $cont;
}
if (!stristr($cont, $our_lazy_tag))
{
return $cont;
}
foreach ($imgsMatchs[0] as $imgMatch)
{
if (stristr($imgMatch, $our_lazy_tag))
{
$newImg = $imgMatch;
$newImg = str_replace ( ' src=', ' previous-src=', $newImg );
$newImg = preg_replace ( '{ previous-src=[\'|"].*?[\'|"] }', ' ', $newImg );
$newImg = str_replace ( ' ' . $our_lazy_tag, ' src', $newImg );
$cont = str_replace ( $imgMatch, $newImg, $cont );
}
}
return $cont;
}
function crawlomatic_fix_google_links($url)
{
if (stristr ( $url, 'news.google' ) && stristr ( $url, 'url=' ))
{
$urlParts = explode ( 'url=', $url );
$correctUrl = $urlParts [1];
$url = $correctUrl;
}
elseif(stristr($url,'news.google') && stristr($url,'/articles/') )
{
$new_link = crawlomatic_get_google_news_link($url);
if($new_link !== false)
{
$url = $new_link;
}
}
elseif(stristr($url, 'https://www.google.com/url?'))
{
preg_match ( '{https:\/\/www\.google\.com\/url\?(?:.*?)&url=([^&]*?)&}', $url, $urlMatches );
if(isset($urlMatches[1][0]))
{
$correctUrl = $urlMatches[1][0];
if (trim($correctUrl) != '')
{
$url = $correctUrl;
}
}
}
return $url;
}
function crawlomatic_get_google_news_link($link)
{
$ch = curl_init();
if($ch === false)
{
return false;
}
curl_setopt($ch, CURLOPT_URL, $link);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_REFERER, 'https://news.google.com/');
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, 20);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: text/html'));
$server_output = curl_exec ($ch);
if($server_output === false)
{
return false;
}
preg_match_all('{Opening <a href="([^"]*?)"}', $server_output, $urlmatches);
if(isset($urlmatches[1][0]))
{
return $urlmatches[1][0];
}
preg_match_all('{jsname="(?:[^"]*?)" rel="nofollow">([^<]*?)<\/a>}i', $server_output, $urlmatches);
if(isset($urlmatches[1][0]))
{
return $urlmatches[1][0];
}
preg_match_all('{jsname="(?:[^"]*?)" rel="nofollow">([^<]*?)<\/a>}i', $server_output, $urlmatches);
if(isset($urlmatches[1][0]))
{
return $urlmatches[1][0];
}
preg_match_all('{data-n-au="([^<]*?)"}i', $server_output, $urlmatches);
if(isset($urlmatches[1][0]))
{
return $urlmatches[1][0];
}
preg_match_all('{null,null,"(https?:\/\/[^"]*?)",null}i', $server_output, $urlmatches);
if(isset($urlmatches[1][0]))
{
return $urlmatches[1][0];
}
preg_match_all('{data-n-au="([^<]*?)"}i', $server_output, $urlmatches);
if(isset($urlmatches[1][0]))
{
return $urlmatches[1][0];
}
crawlomatic_log_to_file('Failed to find Google News final URL for: ' . $link);
return $link;
}
function crawlomatic_auto_update_posts() {
$param = '0';
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$GLOBALS['wp_object_cache']->delete( 'crawlomatic_auto_running_list', 'options' );
if (!get_option('crawlomatic_auto_running_list')) {
$running = array();
} else {
$running = get_option('crawlomatic_auto_running_list');
}
if (!empty($running)) {
if (in_array('auto_update', $running)) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Automatic updating already running, skipping running it this time.');
}
return;
}
}
$running[] = 'auto_update';
update_option('crawlomatic_auto_running_list', $running, false);
register_shutdown_function('crawlomatic_clear_flag_at_shutdown', 'auto_update', true);
if (isset($crawlomatic_Main_Settings['rule_timeout']) && $crawlomatic_Main_Settings['rule_timeout'] != '') {
$timeout = intval($crawlomatic_Main_Settings['rule_timeout']);
} else {
$timeout = 3600;
}
ini_set('memory_limit', '-1');
ini_set('default_socket_timeout', $timeout);
ini_set('safe_mode', 'Off');
ini_set('max_execution_time', $timeout);
ini_set('ignore_user_abort', 1);
ini_set('user_agent', crawlomatic_get_random_user_agent());
$auto = 1;
$updated = 0;
$woo_active = false;
if(!function_exists('is_plugin_active'))
{
include_once( ABSPATH . 'wp-admin/includes/plugin.php' );
}
if (is_plugin_active('woocommerce/woocommerce.php')) {
$woo_active = true;
}
try
{
$post_list = array();
$paged = 0;
if (isset($crawlomatic_Main_Settings['max_at_once']) && $crawlomatic_Main_Settings['max_at_once'] != '') {
$max_at_once = intval($crawlomatic_Main_Settings['max_at_once']);
}
else
{
$max_at_once = 50000;
}
if($max_at_once < 50000)
{
$postsPerPage = $max_at_once;
}
else
{
$postsPerPage = 50000;
}
do
{
$postOffset = $paged * $postsPerPage;
$query = array(
'post_status' => array(
'publish',
'private'
),
'numberposts' => $postsPerPage,
'meta_key' => 'crawlomatic_rule_id',
'fields' => 'ids',
'offset' => $postOffset
);
if (isset($crawlomatic_Main_Settings['author_id']) && $crawlomatic_Main_Settings['author_id'] != '') {
$query['author'] = $crawlomatic_Main_Settings['author_id'];
}
if (isset($crawlomatic_Main_Settings['author_name']) && $crawlomatic_Main_Settings['author_name'] != '') {
$query['author_name'] = $crawlomatic_Main_Settings['author_name'];
}
if (isset($crawlomatic_Main_Settings['category_name']) && $crawlomatic_Main_Settings['category_name'] != '') {
$query['category_name'] = $crawlomatic_Main_Settings['category_name'];
}
if (isset($crawlomatic_Main_Settings['tag_name']) && $crawlomatic_Main_Settings['tag_name'] != '') {
$query['tag'] = $crawlomatic_Main_Settings['tag_name'];
}
if (isset($crawlomatic_Main_Settings['post_id']) && $crawlomatic_Main_Settings['post_id'] != '') {
$postids = $crawlomatic_Main_Settings['post_id'];
$postids = explode(',', $postids);
$postids = array_map('trim', $postids);
$query['post__in'] = $postids;
}
if (isset($crawlomatic_Main_Settings['post_name']) && $crawlomatic_Main_Settings['post_name'] != '') {
$query['name'] = $crawlomatic_Main_Settings['post_name'];
}
if (isset($crawlomatic_Main_Settings['page_id']) && $crawlomatic_Main_Settings['page_id'] != '') {
$query['page_id'] = $crawlomatic_Main_Settings['page_id'];
}
if (isset($crawlomatic_Main_Settings['pagename']) && $crawlomatic_Main_Settings['pagename'] != '') {
$query['pagename'] = $crawlomatic_Main_Settings['pagename'];
}
if (isset($crawlomatic_Main_Settings['post_parent']) && $crawlomatic_Main_Settings['post_parent'] != '') {
$query['post_parent'] = $crawlomatic_Main_Settings['post_parent'];
}
if (isset($crawlomatic_Main_Settings['type_post']) && $crawlomatic_Main_Settings['type_post'] != '') {
$query['post_type'] = array_map('trim', explode(',', $crawlomatic_Main_Settings['type_post']));
}
else
{
$query['post_type'] = 'any';
}
if (isset($crawlomatic_Main_Settings['search_query']) && $crawlomatic_Main_Settings['search_query'] != '') {
$query['s'] = $crawlomatic_Main_Settings['search_query'];
}
if (isset($crawlomatic_Main_Settings['year']) && $crawlomatic_Main_Settings['year'] != '') {
$query['year'] = $crawlomatic_Main_Settings['year'];
}
if (isset($crawlomatic_Main_Settings['month']) && $crawlomatic_Main_Settings['month'] != '') {
$query['monthnum'] = $crawlomatic_Main_Settings['month'];
}
if (isset($crawlomatic_Main_Settings['day']) && $crawlomatic_Main_Settings['day'] != '') {
$query['day'] = $crawlomatic_Main_Settings['day'];
}
if (isset($crawlomatic_Main_Settings['featured_image']) && $crawlomatic_Main_Settings['featured_image'] != 'any') {
if($crawlomatic_Main_Settings['featured_image'] == 'with')
{
$query['meta_query'] = array(
array(
'key' => '_thumbnail_id',
'compare' => 'EXISTS'
)
);
}
elseif($crawlomatic_Main_Settings['featured_image'] == 'without')
{
$query['meta_query'] = array(
array(
'key' => '_thumbnail_id',
'value' => '?',
'compare' => 'NOT EXISTS'
)
);
}
}
$got_me = get_posts($query);
$post_list = array_merge($post_list, $got_me);
$paged++;
}while(!empty($got_me) && count($post_list) < $max_at_once);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Found ' . count($post_list) . ' items to be auto updated.');
}
$rules = get_option('crawlomatic_rules_list');
if (!empty($rules))
{
shuffle($post_list);
wp_suspend_cache_addition(true);
foreach ($post_list as $post)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Running auto updating for post ID: ' . $post);
}
$rerun_failed = get_post_meta($post, 'crawlomatic_rerun_failed', true);
if($rerun_failed === '1')
{
continue;
}
$no_update = get_post_meta($post, 'crawlomatic_no_autoupdate', true);
if($no_update === '1')
{
continue;
}
$rule_unique_id = get_post_meta($post, 'crawlomatic_rule_id', true);
if(!isset($rule_unique_id) || $rule_unique_id == '')
{
if(isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file ('[AutoUpdater] User generated post detected (type not set): ' . $post . '!');
}
continue;
}
$crawlomatic_post_url = get_post_meta($post, 'crawlomatic_post_url', true);
if(isset($crawlomatic_post_url) && $crawlomatic_post_url != '')
{
$found_cont = false;
$cont = 0;
foreach ($rules as $request => $bundle[]) {
$bundle_values = array_values($bundle);
$myValues = $bundle_values[$cont];
$array_my_values = array_values($myValues);for($iji=0;$iji<count($array_my_values);++$iji){if(is_string($array_my_values[$iji])){$array_my_values[$iji]=stripslashes($array_my_values[$iji]);}}
$ids = isset($array_my_values[0]) ? $array_my_values[0] : '';
$schedule = isset($array_my_values[1]) ? $array_my_values[1] : '';
$active = isset($array_my_values[2]) ? $array_my_values[2] : '';
$last_run = isset($array_my_values[3]) ? $array_my_values[3] : '';
$max = isset($array_my_values[4]) ? $array_my_values[4] : '';
$post_status = isset($array_my_values[5]) ? $array_my_values[5] : '';
$post_type = isset($array_my_values[6]) ? $array_my_values[6] : '';
$post_user_name = isset($array_my_values[7]) ? $array_my_values[7] : '';
$item_create_tag = isset($array_my_values[8]) ? $array_my_values[8] : '';
$default_category = isset($array_my_values[9]) ? $array_my_values[9] : '';
$auto_categories = isset($array_my_values[10]) ? $array_my_values[10] : '';
$can_create_tag = isset($array_my_values[11]) ? $array_my_values[11] : '';
$enable_comments = isset($array_my_values[12]) ? $array_my_values[12] : '';
$featured_image = isset($array_my_values[13]) ? $array_my_values[13] : '';
$image_url = isset($array_my_values[14]) ? $array_my_values[14] : '';
$post_title = isset($array_my_values[15]) ? htmlspecialchars_decode($array_my_values[15]) : '';
$post_content = isset($array_my_values[16]) ? htmlspecialchars_decode($array_my_values[16]) : '';
$enable_pingback = isset($array_my_values[17]) ? $array_my_values[17] : '';
$post_format = isset($array_my_values[18]) ? $array_my_values[18] : '';
$only_text = isset($array_my_values[19]) ? $array_my_values[19] : '';
$type = isset($array_my_values[20]) ? $array_my_values[20] : '';
$expre = isset($array_my_values[21]) ? $array_my_values[21] : '';
$get_css = isset($array_my_values[22]) ? $array_my_values[22] : '';
$banned_words = isset($array_my_values[23]) ? $array_my_values[23] : '';
$required_words = isset($array_my_values[24]) ? $array_my_values[24] : '';
$strip_by_id = isset($array_my_values[25]) ? $array_my_values[25] : '';
$strip_by_class = isset($array_my_values[26]) ? $array_my_values[26] : '';
$encoding = isset($array_my_values[27]) ? $array_my_values[27] : 'NO_CHANGE';
$limit_word_count = isset($array_my_values[28]) ? $array_my_values[28] : '';
$translate = isset($array_my_values[29]) ? $array_my_values[29] : 'disabled';
$seed_pag_type = isset($array_my_values[30]) ? $array_my_values[30] : '';
$strip_images = isset($array_my_values[31]) ? $array_my_values[31] : '';
$remove_default = isset($array_my_values[32]) ? $array_my_values[32] : '';
$try_rule_unique_id= isset($array_my_values[33]) ? $array_my_values[33] : '';
$read_more = isset($array_my_values[34]) ? $array_my_values[34] : '';
$skip_og = isset($array_my_values[35]) ? $array_my_values[35] : '';
$remove_cats = isset($array_my_values[36]) ? $array_my_values[36] : '';
$auto_delete = isset($array_my_values[37]) ? $array_my_values[37] : '';
$skip_post_content= isset($array_my_values[38]) ? $array_my_values[38] : '';
$content_percent = isset($array_my_values[39]) ? $array_my_values[39] : '';
$custom_fields = isset($array_my_values[40]) ? $array_my_values[40] : '';
$source_lang = isset($array_my_values[41]) ? $array_my_values[41] : '';
$strip_by_regex = isset($array_my_values[42]) ? $array_my_values[42] : '';
$replace_regex = isset($array_my_values[43]) ? $array_my_values[43] : '';
$no_external = isset($array_my_values[44]) ? $array_my_values[44] : '';
$title_type = isset($array_my_values[45]) ? $array_my_values[45] : '';
$title_expre = isset($array_my_values[46]) ? $array_my_values[46] : '';
$image_type = isset($array_my_values[47]) ? $array_my_values[47] : '';
$image_expre = isset($array_my_values[48]) ? $array_my_values[48] : '';
$date_type = isset($array_my_values[49]) ? $array_my_values[49] : '';
$date_expre = isset($array_my_values[50]) ? $array_my_values[50] : '';
$cat_type = isset($array_my_values[51]) ? $array_my_values[51] : '';
$cat_expre = isset($array_my_values[52]) ? $array_my_values[52] : '';
$max_depth = isset($array_my_values[53]) ? $array_my_values[53] : '';
$custom_cookies = isset($array_my_values[54]) ? $array_my_values[54] : '';
$only_class = isset($array_my_values[55]) ? $array_my_values[55] : '';
$only_id = isset($array_my_values[56]) ? $array_my_values[56] : '';
$no_source = isset($array_my_values[57]) ? $array_my_values[57] : '';
$seed_type = isset($array_my_values[58]) ? $array_my_values[58] : '';
$seed_expre = isset($array_my_values[59]) ? $array_my_values[59] : '';
$crawled_type = isset($array_my_values[60]) ? $array_my_values[60] : '';
$crawled_expre = isset($array_my_values[61]) ? $array_my_values[61] : '';
$paged_crawl_str = isset($array_my_values[62]) ? $array_my_values[62] : '';
$paged_crawl_type = isset($array_my_values[63]) ? $array_my_values[63] : '';
$max_paged_depth = isset($array_my_values[64]) ? $array_my_values[64] : '';
$custom_user_agent= isset($array_my_values[65]) ? $array_my_values[65] : '';
$seed_pag_expre = isset($array_my_values[66]) ? $array_my_values[66] : '';
$price_type = isset($array_my_values[67]) ? $array_my_values[67] : '';
$price_expre = isset($array_my_values[68]) ? $array_my_values[68] : '';
$parent_category_id= isset($array_my_values[69]) ? $array_my_values[69] : '';
$cat_sep = isset($array_my_values[70]) ? $array_my_values[70] : '';
$date_index = isset($array_my_values[71]) ? $array_my_values[71] : '';
$keep_source = isset($array_my_values[72]) ? $array_my_values[72] : '';
$use_proxy = isset($array_my_values[73]) ? $array_my_values[73] : '';
$use_phantom = isset($array_my_values[74]) ? $array_my_values[74] : '';
$custom_crawling_expre = isset($array_my_values[75]) ? $array_my_values[75] : '';
$custom_tax = isset($array_my_values[76]) ? $array_my_values[76] : '';
$user_pass = isset($array_my_values[77]) ? $array_my_values[77] : '';
$strip_by_tag = isset($array_my_values[78]) ? $array_my_values[78] : '';
$crawl_exclude = isset($array_my_values[79]) ? $array_my_values[79] : '';
$royalty_free = isset($array_my_values[80]) ? $array_my_values[80] : '';
$max_results = isset($array_my_values[81]) ? $array_my_values[81] : '';
$strip_comma = isset($array_my_values[82]) ? $array_my_values[82] : '';
$update_existing = isset($array_my_values[83]) ? $array_my_values[83] : '';
$copy_images = isset($array_my_values[84]) ? $array_my_values[84] : '';
$allow_html_tags = isset($array_my_values[85]) ? $array_my_values[85] : '';
$strip_links = isset($array_my_values[86]) ? $array_my_values[86] : '';
$lazy_tag = isset($array_my_values[87]) ? $array_my_values[87] : '';
$reverse_crawl = isset($array_my_values[88]) ? $array_my_values[88] : '';
$replace_words = isset($array_my_values[89]) ? $array_my_values[89] : '';
$attach_screen = isset($array_my_values[90]) ? $array_my_values[90] : '';
$crawl_title_exclude = isset($array_my_values[91]) ? $array_my_values[91] : '';
$strip_by_regex_title = isset($array_my_values[92]) ? $array_my_values[92] : '';
$replace_regex_title = isset($array_my_values[93]) ? $array_my_values[93] : '';
$tag_type = isset($array_my_values[94]) ? $array_my_values[94] : '';
$tag_expre = isset($array_my_values[95]) ? $array_my_values[95] : '';
$tag_sep = isset($array_my_values[96]) ? $array_my_values[96] : '';
$phantom_wait = isset($array_my_values[97]) ? $array_my_values[97] : '';
$strip_by_xpath = isset($array_my_values[98]) ? $array_my_values[98] : '';
$skip_no_match = isset($array_my_values[99]) ? $array_my_values[99] : '';
$continue_search = isset($array_my_values[100]) ? $array_my_values[100] : '';
$author_type = isset($array_my_values[101]) ? $array_my_values[101] : '';
$author_expre = isset($array_my_values[102]) ? $array_my_values[102] : '';
$no_match_query = isset($array_my_values[103]) ? $array_my_values[103] : '';
$post_fields = isset($array_my_values[104]) ? $array_my_values[104] : '';
$limit_content_word_count = isset($array_my_values[105]) ? $array_my_values[105] : '';
$request_delay = isset($array_my_values[106]) ? $array_my_values[106] : '';
$no_spin = isset($array_my_values[107]) ? $array_my_values[107] : '';
$skip_no_image = isset($array_my_values[108]) ? $array_my_values[108] : '';
$limit_title_word_count = isset($array_my_values[109]) ? $array_my_values[109] : '';
$require_one = isset($array_my_values[110]) ? $array_my_values[110] : '';
$max_crawl = isset($array_my_values[111]) ? $array_my_values[111] : '';
$check_only_content = isset($array_my_values[112]) ? $array_my_values[112] : '';
$append_urls = isset($array_my_values[113]) ? $array_my_values[113] : '';
$scripter = isset($array_my_values[114]) ? $array_my_values[114] : '';
$strip_html_by_xpath= isset($array_my_values[115]) ? $array_my_values[115] : '';
$local_storage = isset($array_my_values[116]) ? $array_my_values[116] : '';
$wpml_lang = isset($array_my_values[117]) ? $array_my_values[117] : '';
$download_type = isset($array_my_values[118]) ? $array_my_values[118] : '';
$download_expre = isset($array_my_values[119]) ? $array_my_values[119] : '';
$regex_image = isset($array_my_values[120]) ? $array_my_values[120] : '';
$rule_description = isset($array_my_values[121]) ? $array_my_values[121] : '';
$gallery_type = isset($array_my_values[122]) ? $array_my_values[122] : '';
$gallery_expre = isset($array_my_values[123]) ? $array_my_values[123] : '';
$gallery_regex = isset($array_my_values[124]) ? $array_my_values[124] : '';
$replace_gallery_regex= isset($array_my_values[125]) ? $array_my_values[125] : '';
$excerpt_type = isset($array_my_values[126]) ? $array_my_values[126] : '';
$excerpt_expre = isset($array_my_values[127]) ? $array_my_values[127] : '';
$check_words = isset($array_my_values[128]) ? $array_my_values[128] : '';
$auto_captcha = isset($array_my_values[129]) ? $array_my_values[129] : '';
$enable_adblock = isset($array_my_values[130]) ? $array_my_values[130] : '';
$copy_types = isset($array_my_values[131]) ? $array_my_values[131] : '';
$scrape_variations= isset($array_my_values[132]) ? $array_my_values[132] : '';
$second_translate = isset($array_my_values[133]) ? $array_my_values[133] : 'disabled';
$run_raw_html = isset($array_my_values[134]) ? $array_my_values[134] : '';
$no_auto_update = isset($array_my_values[135]) ? $array_my_values[135] : '';
$clickelement = isset($array_my_values[136]) ? $array_my_values[136] : '';
$regular_price_type = isset($array_my_values[137]) ? $array_my_values[137] : '';
$regular_price_expre = isset($array_my_values[138]) ? $array_my_values[138] : '';
$strip_comma_regular = isset($array_my_values[139]) ? $array_my_values[139] : '';
$crawl_include = isset($array_my_values[140]) ? $array_my_values[140] : '';
$featured_replacer= isset($array_my_values[141]) ? $array_my_values[141] : '';
$copy_regex = isset($array_my_values[142]) ? $array_my_values[142] : '';
$variants_label = isset($array_my_values[143]) ? $array_my_values[143] : 'Variants';
$parent_id = isset($array_my_values[144]) ? $array_my_values[144] : '';
if($try_rule_unique_id == $rule_unique_id)
{
$found_cont = $cont;
break;
}
$cont++;
}
if($no_auto_update == '1')
{
continue;
}
if ($enable_comments == '1') {
$accept_comments = 'open';
}
else
{
$accept_comments = 'closed';
}
if (isset($crawlomatic_Main_Settings['cat_separator']) && $crawlomatic_Main_Settings['cat_separator'] !== '') {
if($cat_sep == '')
{
$cat_sep = $crawlomatic_Main_Settings['cat_separator'];
}
}
else
{
if($cat_sep == '')
{
$cat_sep = ',';
}
}
if($tag_sep == '')
{
$tag_sep = ',';
}
if($found_cont === false)
{
crawlomatic_log_to_file ('[AutoUpdater] Rule uniqueID no longer found in rules list: ' . esc_html($rule_unique_id));
continue;
}
$GLOBALS['crawl_done'] = false;
$GLOBALS['seed'] = true;
$items = array();
$items = crawlomatic_crawl_page($crawlomatic_post_url, 1, $skip_og, $skip_post_content, 1, '', '', $type, $expre, $title_type, $title_expre, $image_type, $image_expre, $date_type, $date_expre, $cat_type, $cat_expre, 2, $custom_cookies, '', '', '0', 'disabled', '', 'disabled', '', $paged_crawl_str, $paged_crawl_type, $max_paged_depth, $custom_user_agent, array(), true, $cat_sep, false, 'disabled', '', $price_type, $price_expre, true, $use_proxy, $use_phantom, false, $custom_crawling_expre, $user_pass, array(), array(), $encoding, $strip_comma, $reverse_crawl, $lazy_tag, $tag_type, $tag_expre, $tag_sep, $phantom_wait, $found_cont, $continue_search, $author_type, $author_expre, '', $post_fields, $request_delay, '0', '1', '0', $scripter, $local_storage, $download_type, $download_expre, $gallery_type, $gallery_expre, $excerpt_type, $excerpt_expre, 'both', $auto_captcha, $enable_adblock, false, $scrape_variations, $run_raw_html, $strip_by_regex, $replace_regex, '0', $clickelement, $regular_price_type, $regular_price_expre, $strip_comma_regular, array());
if($items == false || count($items) == 0)
{
if (isset($crawlomatic_Main_Settings['update_actions']) && ($crawlomatic_Main_Settings['update_actions'] == '1' || $crawlomatic_Main_Settings['update_actions'] == '3'))
{
if(isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file ('[AutoUpdater] Double checking URL for validity: ' . $crawlomatic_post_url);
}
$status_code = crawlomatic_get_http_code($crawlomatic_post_url, $request_delay);
if($status_code == 404)
{
$za_post = array();
$za_post['ID'] = $post;
$za_post['post_status'] = 'draft';
remove_filter('content_save_pre', 'wp_filter_post_kses');
remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');remove_filter('title_save_pre', 'wp_filter_kses');
$post_id = wp_update_post($za_post, true);
$updated++;
add_filter('content_save_pre', 'wp_filter_post_kses');
add_filter('content_filtered_save_pre', 'wp_filter_post_kses');add_filter('title_save_pre', 'wp_filter_kses');
if(isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file ('[AutoUpdater] URL was not rescraped successfully (' . $status_code . '): ' . $crawlomatic_post_url . ' - setting post ID as draft: ' . $post);
}
}
else
{
if(isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file ('[AutoUpdater] URL was not rescraped successfully, but status code is not 404, skipping it: ' . $crawlomatic_post_url . ' - status code: ' . $status_code);
}
}
}
else
{
crawlomatic_log_to_file ('[AutoUpdater] URL was not rescraped successfully: ' . $crawlomatic_post_url . ' post autoupdating will not run any more on it.');
update_post_meta($post, 'crawlomatic_rerun_failed', '1');
continue;
}
}
else
{
if (isset($crawlomatic_Main_Settings['update_actions']) && ($crawlomatic_Main_Settings['update_actions'] == '2' || $crawlomatic_Main_Settings['update_actions'] == '3'))
{
$items = array(0 => $items[0]);
$count = 1;
$init_date = time();
$skip_pcount = 0;
$skipped_pcount = 0;
if(isset($crawlomatic_Main_Settings['attr_text']) && $crawlomatic_Main_Settings['attr_text'] != '')
{
$img_attr = $crawlomatic_Main_Settings['attr_text'];
}
else
{
$img_attr = '';
}
if (isset($crawlomatic_Main_Settings['def_user']) && is_numeric($crawlomatic_Main_Settings['def_user'])) {
$dff_u = $crawlomatic_Main_Settings['def_user'];
}
else
{
$dff_u = '1';
}
$user_name_type = $post_user_name;
for($iloop = 0; $iloop < count($items); ++$iloop)
{
$css_cont = '';
$item_price_multi = $items[$iloop]['price'];
if($item_price_multi !== '' && $item_price_multi !== false)
{
if (isset($crawlomatic_Main_Settings['price_multiply']) && $crawlomatic_Main_Settings['price_multiply'] !== '')
{
$item_price_multi = round($item_price_multi * $crawlomatic_Main_Settings['price_multiply'], 2);
}
if (isset($crawlomatic_Main_Settings['price_add']) && $crawlomatic_Main_Settings['price_add'] !== '')
{
$item_price_multi = $item_price_multi + $crawlomatic_Main_Settings['price_add'];
}
if (isset($crawlomatic_Main_Settings['price_end']) && $crawlomatic_Main_Settings['price_end'] !== '')
{
$item_price_multi = floor($item_price_multi) + $crawlomatic_Main_Settings['price_end'];
}
if (isset($crawlomatic_Main_Settings['d_sep']) && $crawlomatic_Main_Settings['d_sep'] != '' && isset($crawlomatic_Main_Settings['t_sep']) && $crawlomatic_Main_Settings['t_sep'] != '')
{
$d_sep = $crawlomatic_Main_Settings['d_sep'];
$t_sep = $crawlomatic_Main_Settings['t_sep'];
$price_t = number_format($item_price_multi, 2, $d_sep, $t_sep);
if(!empty($price_t))
{
$item_price_multi = $price_t;
}
$price_x = number_format($items[$iloop]['price'], 2, $d_sep, $t_sep);
if(!empty($price_x))
{
$items[$iloop]['price'] = $price_x;
}
}
}
else
{
$item_price_multi = '';
}
$item_regular_price_multi = $items[$iloop]['regular_price'];
if($item_regular_price_multi !== '' && $item_regular_price_multi !== false && $item_regular_price_multi !== 0)
{
if (isset($crawlomatic_Main_Settings['price_multiply']) && $crawlomatic_Main_Settings['price_multiply'] !== '')
{
$item_regular_price_multi = round($item_regular_price_multi * $crawlomatic_Main_Settings['price_multiply'], 2);
}
if (isset($crawlomatic_Main_Settings['price_add']) && $crawlomatic_Main_Settings['price_add'] !== '')
{
$item_regular_price_multi = $item_regular_price_multi + $crawlomatic_Main_Settings['price_add'];
}
if (isset($crawlomatic_Main_Settings['price_end']) && $crawlomatic_Main_Settings['price_end'] !== '')
{
$item_regular_price_multi = floor($item_regular_price_multi) + $crawlomatic_Main_Settings['price_end'];
}
if (isset($crawlomatic_Main_Settings['d_sep']) && $crawlomatic_Main_Settings['d_sep'] != '' && isset($crawlomatic_Main_Settings['t_sep']) && $crawlomatic_Main_Settings['t_sep'] != '')
{
$d_sep = $crawlomatic_Main_Settings['d_sep'];
$t_sep = $crawlomatic_Main_Settings['t_sep'];
$price_t = number_format($item_regular_price_multi, 2, $d_sep, $t_sep);
if(!empty($price_t))
{
$item_regular_price_multi = $price_t;
}
$price_x = number_format($items[$iloop]['regular_price'], 2, $d_sep, $t_sep);
if(!empty($price_x))
{
$items[$iloop]['regular_price'] = $price_x;
}
}
}
else
{
$item_regular_price_multi = '';
}
if($item_price_multi == '' && $item_regular_price_multi != '')
{
$item_price_multi = $item_regular_price_multi;
}
$img_found = false;
$url = $items[$iloop]['url'];
$title = $items[$iloop]['title'];
if (!isset($crawlomatic_Main_Settings['unchanged_urls']) || $crawlomatic_Main_Settings['unchanged_urls'] != 'on')
{
$url1 = preg_replace('{#(.*)}s', '', $url);
if($url1 !== null)
{
$url = $url1;
}
}
if(isset($crawlomatic_Main_Settings['shortest_api']) && $crawlomatic_Main_Settings['shortest_api'] != '')
{
$short_url = crawlomatic_url_handle($url, $crawlomatic_Main_Settings['shortest_api']);
}
else
{
$short_url = $url;
}
$content = $items[$iloop]['content'];
if($limit_content_word_count != '' && is_numeric($limit_content_word_count))
{
$content = crawlomatic_custom_wp_trim_excerpt($content, $limit_content_word_count, $short_url, $read_more);
}
if (trim($lazy_tag) != '' && trim($lazy_tag) != 'src' && strstr($content, trim($lazy_tag)) !== false)
{
$lazy_tag = trim($lazy_tag);
$lazy_found = false;
preg_match_all('{<img .*?>}s', $content, $imgsMatchs);
if(isset($imgsMatchs[0][0]))
{
$imgsMatchs = $imgsMatchs[0];
foreach($imgsMatchs as $imgMatch){
if(stristr($imgMatch, $lazy_tag )){
$newImg = $imgMatch;
$newImg1 = preg_replace('{ src=".*?"}', '', $newImg);
if($newImg1 !== null)
{
$newImg = $newImg1;
}
$newImg = str_replace($lazy_tag, 'src', $newImg);
$content = str_replace($imgMatch, $newImg, $content);
$lazy_found = true;
}
}
}
if($lazy_found == false)
{
$content = str_replace(trim($lazy_tag), 'src', $content);
}
preg_match_all('{<iframe .*?>}s', $content, $imgsMatchs);
if(isset($imgsMatchs[0][0]))
{
$imgsMatchs = $imgsMatchs[0];
foreach($imgsMatchs as $imgMatch){
if(stristr($imgMatch, $lazy_tag )){
$newImg = $imgMatch;
$newImg1 = preg_replace('{ src=["\'].*?[\'"]}', '', $newImg);
if($newImg1 !== null)
{
$newImg = $newImg1;
}
if(stristr($lazy_tag, 'srcset') !== false)
{
$newImg1 = preg_replace('{\ssrcset=["\'].*?[\'"]}', '', $newImg);
if($newImg1 !== null)
{
$newImg = $newImg1;
}
$newImg = str_replace($lazy_tag, 'srcset', $newImg);
preg_match_all('#srcset=[\'"](?:([^"\'\s,]+)\s*(?:\s+\d+[wx])(?:,\s*)?)+["\']#', $newImg, $imgma);
if(isset($imgma[1][0]))
{
$newImg1 = preg_replace('#<img#', '<img src="' . $imgma[1][0] . '"', $newImg);
if($newImg1 !== null)
{
$newImg = $newImg1;
}
}
}
else
{
$newImg = str_replace($lazy_tag, 'src', $newImg);
}
$content = str_replace($imgMatch, $newImg, $content);
}
}
}
}
else
{
$content = crawlomatic_lazy_loading_auto_fix($content);
}
if ((isset($crawlomatic_Main_Settings['strip_content_links']) && $crawlomatic_Main_Settings['strip_content_links'] == 'on') || $strip_links == '1') {
$content = crawlomatic_strip_links($content);
}
if ((isset($crawlomatic_Main_Settings['strip_internal_content_links']) && $crawlomatic_Main_Settings['strip_internal_content_links'] == 'on')) {
$content = crawlomatic_strip_internal_links($content, $url);
}
if (isset($crawlomatic_Main_Settings['convert_cyrilic']) && $crawlomatic_Main_Settings['convert_cyrilic'] == "on") {
$content = crawlomatic_replace_cyrilic($content);
$title = crawlomatic_replace_cyrilic($title);
}
if($limit_title_word_count != '' && is_numeric($limit_title_word_count))
{
$title = wp_trim_words($title, intval($limit_title_word_count), '');
}
if (isset($crawlomatic_Main_Settings['strip_scripts']) && $crawlomatic_Main_Settings['strip_scripts'] == 'on') {
$content1 = preg_replace('{<script[\s\S]*?\/\s?script>}s', '', $content);
if($content1 !== null)
{
$content = $content1;
}
$content1 = preg_replace('{<ins.*?ins>}s', '', $content);
if($content1 !== null)
{
$content = $content1;
}
$content1 = preg_replace('{<ins.*?>}s', '', $content);
if($content1 !== null)
{
$content = $content1;
}
$content1 = preg_replace('{\(adsbygoogle.*?\);}s', '', $content);
if($content1 !== null)
{
$content = $content1;
}
}
$my_url = parse_url($url);
if(isset($my_url['host']))
{
$my_host = $my_url['host'];
}
else
{
$my_host = '';
}
preg_match_all('{src[\s]*=[\s]*["|\'](.*?)["|\']}is', $content , $matches);
$img_srcs = ($matches[1]);
$replaced_links_img = array();
foreach ($img_srcs as $img_src){
$original_src = $img_src;
$img_src_rel = crawlomatic_fix_single_link($img_src, $url);
if($img_src_rel != $img_src)
{
if(!in_array($img_src, $replaced_links_img))
{
$replaced_links_img[] = $img_src;
$content = str_replace($img_src, $img_src_rel, $content);
}
}
}
if (!isset($crawlomatic_Main_Settings['keep_srcset']) || $crawlomatic_Main_Settings['keep_srcset'] != 'on')
{
$content1 = preg_replace('{\ssrcset=".*?"}', ' ', $content);
if($content1 !== null)
{
$content = $content1;
}
$content1 = preg_replace('{\ssizes=".*?"}', ' ', $content);
if($content1 !== null)
{
$content = $content1;
}
}
$content = html_entity_decode($content, ENT_NOQUOTES | ENT_HTML5) ;
if ($get_css == '1') {
add_action('wp_enqueue_scripts', 'crawlomatic_wp_custom_css_files', 10, 2);
$htmlcontent = crawlomatic_get_web_page($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, '', '', $request_delay);
if ($htmlcontent !== FALSE) {
preg_match_all('/"([^"]+?\.css)"/', $htmlcontent, $matches);
$matches = $matches[0];
$matches = array_unique($matches);
$cont = 0;
foreach ($matches as $match) {
$match = trim(htmlspecialchars_decode($match), '"');
if (!crawlomatic_url_exists($match, $use_proxy, $crawlomatic_Main_Settings, $custom_user_agent, $custom_cookies, $user_pass)) {
$tmp_match = 'http:' . $match;
if (!crawlomatic_url_exists($tmp_match, $use_proxy, $crawlomatic_Main_Settings, $custom_user_agent, $custom_cookies, $user_pass)) {
$parts = explode('/', $url);
$dir = '';
for ($i = 0; $i < count($parts) - 1; $i++) {
$dir .= $parts[$i] . "/";
}
$tmp_match = $dir . trim($match, '/');
if (!crawlomatic_url_exists($tmp_match, $use_proxy, $crawlomatic_Main_Settings, $custom_user_agent, $custom_cookies, $user_pass)) {
continue;
} else {
$match = $tmp_match;
}
} else {
$match = $tmp_match;
}
}
$css_temp = crawlomatic_get_web_page($match, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, '', '', $request_delay);
if ($css_temp === FALSE) {
continue;
}
$css_cont .= wp_strip_all_tags($css_temp) . ' ';
}
}
}
$description = crawlomatic_getExcerpt($content);
if($items[$iloop]['crawled_date'] === true)
{
$date = $items[$iloop]['date'];
}
else
{
$postdatex = gmdate("Y-m-d H:i:s", intval($init_date));
$date = $postdatex;
$init_date = $init_date - 1;
}
if($date_index != '')
{
$old_d = strtotime($date);
if($old_d !== false)
{
$newtime = $old_d + ($date_index * 60 * 60);
$date = date("Y-m-d H:i:s", $newtime);
}
}
$extra_categories = '';
if(is_array($items[$iloop]['categories']))
{
foreach ($items[$iloop]['categories'] as $category)
{
$extra_categories .= $category . ',';
}
$extra_categories = trim($extra_categories, ',');
}
if (isset($crawlomatic_Main_Settings['convert_cyrilic']) && $crawlomatic_Main_Settings['convert_cyrilic'] == "on") {
$extra_categories = crawlomatic_replace_cyrilic($extra_categories);
}
$my_post = array();
if(isset($items[$iloop]['ID']))
{
$my_post['ID'] = $items[$iloop]['ID'];
}
else
{
$my_post['ID'] = $post;
}
$my_post['attach_ids'] = array();
$my_post['crawlomatic_enable_pingbacks'] = $enable_pingback;
$my_post['post_type'] = $post_type;
$my_post['comment_status'] = $accept_comments;
if($user_name_type == 'rnd-crawlomatic')
{
$randid = crawlomatic_display_random_user();
if($randid === false)
{
$post_user_set = $dff_u;
}
else
{
$post_user_set = $randid->ID;
}
$my_post['post_author'] = $post_user_set;
}
elseif($user_name_type == 'feed-crawlomatic')
{
if($items[$iloop]['author'] != '')
{
if(username_exists( sanitize_user($items[$iloop]['author']) ))
{
$user_id_t = get_user_by('login', sanitize_user($items[$iloop]['author']));
if($user_id_t)
{
$post_user_set = $user_id_t->ID;
}
else
{
$post_user_set = $dff_u;
}
}
else
{
$palphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^*()-+=_?><,.;:}{][';
$ppass = '';
$alphaLength = strlen($palphabet) - 1;
for ($ipass = 0; $ipass < 8; $ipass++)
{
$npass = rand(0, $alphaLength);
$ppass .= $palphabet[$npass];
}
$curr_id = wp_create_user(sanitize_user($items[$iloop]['author']), $ppass, crawlomatic_generate_random_email());
if ( is_int($curr_id) )
{
$u = new WP_User($curr_id);
$u->remove_role('subscriber');
$u->add_role('editor');
$post_user_set = $curr_id;
update_user_meta($curr_id,'last_name', $items[$iloop]['author']);
}
else
{
$post_user_set = $dff_u;
}
}
}
else
{
$post_user_set = $dff_u;
}
$my_post['post_author'] = $post_user_set;
}
elseif($user_name_type == 'url-crawlomatic')
{
if($my_host != '')
{
$my_host = preg_replace('#^www\.(.+)#i', '$1', $my_host);
if(username_exists( sanitize_user($my_host) ))
{
$user_id_t = get_user_by('login', sanitize_user($my_host));
if($user_id_t)
{
$post_user_set = $user_id_t->ID;
}
else
{
$post_user_set = $dff_u;
}
}
else
{
$palphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^*()-+=_?><,.;:}{][';
$ppass = '';
$alphaLength = strlen($palphabet) - 1;
for ($ipass = 0; $ipass < 8; $ipass++)
{
$npass = rand(0, $alphaLength);
$ppass .= $palphabet[$npass];
}
$curr_id = wp_create_user(sanitize_user($my_host), $ppass, crawlomatic_generate_random_email());
if ( is_int($curr_id) )
{
$u = new WP_User($curr_id);
$u->remove_role('subscriber');
$u->add_role('editor');
$post_user_set = $curr_id;
update_user_meta($curr_id,'last_name', $my_host);
}
else
{
$post_user_set = $dff_u;
}
}
}
else
{
$post_user_set = $dff_u;
}
$my_post['post_author'] = $post_user_set;
}
else
{
$my_post['post_author'] = $post_user_name;
}
$item_tags = '';
if(is_array($items[$iloop]['tags']))
{
foreach ($items[$iloop]['tags'] as $xtag)
{
$item_tags .= $xtag . ',';
}
$item_tags = trim($item_tags, ',');
}
$item_download = array();
$my_post['post_gallery'] = $items[$iloop]['gallery'];
if(isset($items[$iloop]['download_remote']) && count($items[$iloop]['download_remote']) > 0)
{
$item_download = $items[$iloop]['download_remote'];
$my_post['download_local'] = $items[$iloop]['download_local'];
}
else
{
$my_post['download_local'] = array();
}
if (isset($crawlomatic_Main_Settings['convert_cyrilic']) && $crawlomatic_Main_Settings['convert_cyrilic'] == "on") {
$item_tags = crawlomatic_replace_cyrilic($item_tags);
}
if ($can_create_tag == '1') {
$my_post['tags_input'] = ($item_create_tag != '' ? $item_create_tag . ',' : '') . $item_tags;
} else if ($item_create_tag != '') {
$my_post['tags_input'] = $item_create_tag;
}
$orig_content = '';
$my_post['crawlomatic_post_url'] = $short_url;
$my_post['crawlomatic_post_orig_url'] = $url;
$my_post['crawlomatic_post_date'] = $date;
if($royalty_free == '1')
{
if (has_post_thumbnail( $my_post['ID'] ) )
{
$get_img = wp_get_attachment_image_src( get_post_thumbnail_id( $my_post['ID'] ), 'single-post-thumbnail' );
if($get_img === false)
{
$get_img = '';
}
else
{
if(is_array($get_img) && isset($get_img[0]))
{
$get_img = $get_img[0];
}
else
{
$get_img = '';
}
}
}
else
{
$get_img = '';
}
}
else
{
$get_img = $items[$iloop]['image'];
}
if($get_img != '')
{
$img_found = true;
$get_img = crawlomatic_fix_single_link($get_img, $url);
}
if(substr($get_img, 0, 2) === "//")
{
if(substr($url, 0, 5) === "https")
{
$get_img = 'https:' . $get_img;
}
else
{
$get_img = 'http:' . $get_img;
}
}
if($image_type == 'gallery')
{
if ($gallery_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $gallery_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_gallery_regex);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_cont_gallery = preg_replace("~" . $sbr . "~i", $repreg, $get_img);
if($temp_cont_gallery !== NULL)
{
$get_img = $temp_cont_gallery;
}
}
}
}
if ($strip_by_id != '') {
require_once (dirname(__FILE__) . "/res/simple_html_dom.php");
$strip_list = explode(',', $strip_by_id);
$extractok = false;
$html_dom_original_html = crawlomatic_str_get_html($content);
if($html_dom_original_html !== false && method_exists($html_dom_original_html, 'find')){
foreach ($strip_list as $strip_id) {
$ret = $html_dom_original_html->find('*[id="'.trim($strip_id).'"]');
foreach ($ret as $itm ) {
$extractok = true;
$itm->outertext = '' ;
}
}
$content = $html_dom_original_html->save();
$html_dom_original_html->clear();
unset($html_dom_original_html);
}
if($extractok == false)
{
foreach ($strip_list as $strip_id) {
if(trim($strip_id) == '')
{
continue;
}
$content_r = crawlomatic_removeTagByID($content, trim($strip_id));
if($content_r !== false)
{
$content = $content_r;
}
}
}
}
if ($strip_by_class != '') {
require_once (dirname(__FILE__) . "/res/simple_html_dom.php");
$strip_list = explode(',', $strip_by_class);
$extractok = false;
$html_dom_original_html = crawlomatic_str_get_html($content);
if($html_dom_original_html !== false && method_exists($html_dom_original_html, 'find')){
foreach ($strip_list as $strip_class) {
if(trim($strip_class) == '')
{
continue;
}
$ret = $html_dom_original_html->find('*[class="'.trim($strip_class).'"]');
foreach ($ret as $itm ) {
$extractok = true;
$itm->outertext = '' ;
}
}
$content = $html_dom_original_html->save();
$html_dom_original_html->clear();
unset($html_dom_original_html);
}
if($extractok == false)
{
foreach ($strip_list as $strip_class) {
if(trim($strip_class) == '')
{
continue;
}
$content_r = crawlomatic_removeTagByClass($content, trim($strip_class));
if($content_r !== false)
{
$content = $content_r;
}
}
}
}
if ($strip_by_xpath != '') {
require_once (dirname(__FILE__) . "/res/simple_html_dom.php");
$strip_by_xpath_arr = preg_split('/
|
|
/', $strip_by_xpath);
foreach($strip_by_xpath_arr as $fxx)
{
$extractok = false;
$html_dom_original_html = crawlomatic_str_get_html($content);
if($html_dom_original_html !== false && method_exists($html_dom_original_html, 'find')){
$ret = $html_dom_original_html->find($fxx);
foreach ($ret as $itm ) {
$extractok = true;
$itm->outertext = '' ;
}
$content = $html_dom_original_html->save();
$html_dom_original_html->clear();
unset($html_dom_original_html);
}
if($extractok == false)
{
$content_r = crawlomatic_removeTagByXPath($content, trim($fxx));
if($content_r !== false)
{
$content = $content_r;
}
}
}
}
if ($strip_html_by_xpath != '') {
$strip_html_by_xpath_arr = preg_split('/
|
|
/', $strip_html_by_xpath);
require_once (dirname(__FILE__) . "/res/simple_html_dom.php");
foreach($strip_html_by_xpath_arr as $fx)
{
$extractok = false;
$html_dom_original_html = crawlomatic_str_get_html($content);
if($html_dom_original_html !== false && method_exists($html_dom_original_html, 'find')){
$ret = $html_dom_original_html->find($fx);
foreach ($ret as $itm ) {
$extractok = true;
$itm->outertext = strip_tags($itm->outertext) ;
}
$content = $html_dom_original_html->save();
$html_dom_original_html->clear();
unset($html_dom_original_html);
}
if($extractok == false)
{
$content_r = crawlomatic_removeHTMLByXPath($content, trim($fx));
if($content_r !== false)
{
$content = $content_r;
}
}
}
}
if ($strip_by_tag != '') {
require_once (dirname(__FILE__) . "/res/simple_html_dom.php");
$strip_list = explode(',', $strip_by_tag);
$extractok = false;
$html_dom_original_html = crawlomatic_str_get_html($content);
if($html_dom_original_html !== false && method_exists($html_dom_original_html, 'find')){
foreach ($strip_list as $strip_tag) {
$strip_tag = trim($strip_tag);
if($strip_tag != '')
{
$ret = $html_dom_original_html->find($strip_tag);
foreach ($ret as $itm ) {
$extractok = true;
$itm->outertext = '' ;
}
}
}
$content = $html_dom_original_html->save();
$html_dom_original_html->clear();
unset($html_dom_original_html);
}
if($extractok == false)
{
foreach ($strip_list as $strip_tag) {
if(trim($strip_tag) == '')
{
continue;
}
$content_r = crawlomatic_removeTagByTag($content, trim($strip_tag));
if($content_r !== false)
{
$content = $content_r;
}
}
}
}
if ($only_text == '1') {
$content = crawlomatic_strip_html_tags($content, $allow_html_tags);
}
$content = crawlomatic_fix_links($content, $url);
$postdate = strtotime($date);
if($postdate !== FALSE)
{
$postdate = gmdate("Y-m-d H:i:s", intval($postdate));
}
if($postdate !== FALSE)
{
if($items[$iloop]['crawled_date'] === true)
{
$my_post['post_date_gmt'] = $postdate;
}
else
{
$my_post['post_date_gmt'] = $postdate;
}
}
if(isset($items[$iloop]['custom_shortcodes']) && is_array($items[$iloop]['custom_shortcodes']))
{
$custom_shortcodes_arr = $items[$iloop]['custom_shortcodes'];
}
else
{
$custom_shortcodes_arr = array();
}
if($postdate === false)
{
$postdate = $date;
}
if($content_percent != '' && is_numeric($content_percent) && $content_percent != 100)
{
$temp_t = crawlomatic_strip_html_tags($content);
$temp_t = str_replace(' ',"",$temp_t);
$ccount = str_word_count($temp_t);
if($ccount > 10)
{
$str_count = strlen($content);
$leave_cont = round($str_count * $content_percent / 100);
$content = crawlomatic_substr_close_tags($content, $leave_cont);
}
else
{
$ccount = crawlomatic_count_unicode_words($temp_t);
if($ccount > 10)
{
$str_count = strlen($content);
$leave_cont = round($str_count * $content_percent / 100);
$content = crawlomatic_substr_close_tags($content, $leave_cont);
}
}
}
$screenimageURL = '';
$screens_attach_id = '';
if(isset($items[$iloop]['screen_image']) && $items[$iloop]['screen_image'] != '')
{
if($attach_screen == '1' || (strstr($post_content, '%%item_show_screenshot%%') !== false || strstr($post_content, '%%item_screenshot_url%%') !== false || strstr($custom_fields, '%%item_show_screenshot%%') !== false || strstr($custom_fields, '%%item_screenshot_url%%') !== false || strstr($custom_tax, '%%item_show_screenshot%%') !== false || strstr($custom_tax, '%%item_screenshot_url%%') !== false))
{
$screenimageURL = $items[$iloop]['screen_image'];
}
}
else
{
if (isset($crawlomatic_Main_Settings['headless_screen']) && $crawlomatic_Main_Settings['headless_screen'] == 'on')
{
if($attach_screen == '1' || (strstr($post_content, '%%item_show_screenshot%%') !== false || strstr($post_content, '%%item_screenshot_url%%') !== false || strstr($custom_fields, '%%item_show_screenshot%%') !== false || strstr($custom_fields, '%%item_screenshot_url%%') !== false || strstr($custom_tax, '%%item_show_screenshot%%') !== false || strstr($custom_tax, '%%item_screenshot_url%%') !== false))
{
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$phantomjs_comm .= '--proxy=' . trim($prx[$randomness]) . ' ';
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$phantomjs_comm .= '--proxy-auth=' . trim($prx_auth[$randomness]) . ' ';
}
}
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
if (isset($crawlomatic_Main_Settings['screenshot_height']) && $crawlomatic_Main_Settings['screenshot_height'] != '')
{
$h = esc_attr($crawlomatic_Main_Settings['screenshot_height']);
}
else
{
$h = '0';
}
if (isset($crawlomatic_Main_Settings['screenshot_width']) && $crawlomatic_Main_Settings['screenshot_width'] != '')
{
$w = esc_attr($crawlomatic_Main_Settings['screenshot_width']);
}
else
{
$w = '1920';
}
$screenshotimg = crawlomatic_get_screenshot_PuppeteerAPI($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, '', $request_delay, $scripter, $local_storage, $h, $w, $auto_captcha, $enable_adblock, $clickelement);
if($screenshotimg !== false)
{
$upload_dir = wp_upload_dir();
$dir_name = $upload_dir['basedir'] . '/crawlomatic-files';
$dir_url = $upload_dir['baseurl'] . '/crawlomatic-files';
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if (!$wp_filesystem->exists($dir_name)) {
wp_mkdir_p($dir_name);
}
$screen_name = uniqid();
$screenimageName = $dir_name . '/' . $screen_name . '.jpg';
$screenimageURL = $dir_url . '/' . $screen_name . '.jpg';
$is_fail = $wp_filesystem->put_contents($screenimageName, $screenshotimg);
if($is_fail === false)
{
crawlomatic_log_to_file('Error in writing screenshot to file: ' . $screenimageName);
}
else
{
$wp_filetype = wp_check_filetype( $screen_name . '.jpg', null );
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name( $screen_name . '.jpg' ),
'post_content' => '',
'post_status' => 'inherit'
);
$screens_attach_id = wp_insert_attachment($attachment, $screenimageName);
require_once( ABSPATH . 'wp-admin/includes/image.php' );
require_once( ABSPATH . 'wp-admin/includes/media.php' );
$attach_data = wp_generate_attachment_metadata($screens_attach_id, $screenimageName);
wp_update_attachment_metadata( $screens_attach_id, $attach_data );
}
}
}
}
elseif (isset($crawlomatic_Main_Settings['phantom_screen']) && $crawlomatic_Main_Settings['phantom_screen'] == 'on')
{
if($attach_screen == '1' || (strstr($post_content, '%%item_show_screenshot%%') !== false || strstr($post_content, '%%item_screenshot_url%%') !== false || strstr($custom_fields, '%%item_show_screenshot%%') !== false || strstr($custom_fields, '%%item_screenshot_url%%') !== false || strstr($custom_tax, '%%item_show_screenshot%%') !== false || strstr($custom_tax, '%%item_screenshot_url%%') !== false))
{
if(function_exists('shell' . '_exec'))
{
$disabled = explode(',', ini_get('disable_functions'));
if(!in_array('shell' . '_exec', $disabled))
{
if (isset($crawlomatic_Main_Settings['phantom_path']) && $crawlomatic_Main_Settings['phantom_path'] != '')
{
$phantomjs_comm = $crawlomatic_Main_Settings['phantom_path'] . ' ';
}
else
{
$phantomjs_comm = 'phantomjs ';
}
if (isset($crawlomatic_Main_Settings['screenshot_height']) && $crawlomatic_Main_Settings['screenshot_height'] != '')
{
$h = esc_attr($crawlomatic_Main_Settings['screenshot_height']);
}
else
{
$h = '0';
}
if (isset($crawlomatic_Main_Settings['screenshot_width']) && $crawlomatic_Main_Settings['screenshot_width'] != '')
{
$w = esc_attr($crawlomatic_Main_Settings['screenshot_width']);
}
else
{
$w = '1920';
}
$upload_dir = wp_upload_dir();
$dir_name = $upload_dir['basedir'] . '/crawlomatic-files';
$dir_url = $upload_dir['baseurl'] . '/crawlomatic-files';
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if (!$wp_filesystem->exists($dir_name)) {
wp_mkdir_p($dir_name);
}
$screen_name = uniqid();
$screenimageName = $dir_name . '/' . $screen_name;
$screenimageURL = $dir_url . '/' . $screen_name . '.jpg';
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$phantomjs_comm .= '--proxy=' . trim($prx[$randomness]) . ' ';
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$phantomjs_comm .= '--proxy-auth=' . trim($prx_auth[$randomness]) . ' ';
}
}
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
$shefunc = trim(' s ') . trim(' h ') . 'ell' . '_exec';
$cmdResult = $shefunc($phantomjs_comm . '"' . dirname(__FILE__) .'/res/phantomjs/phantom-screenshot.js"' . ' "'. dirname(__FILE__) . '" "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '" 2>&1');
if($cmdResult === NULL || $cmdResult == '' || trim($cmdResult) === 'timeout' || stristr($cmdResult, 'sh: phantomjs: command not found') !== false)
{
$screenimageURL = '';
crawlomatic_log_to_file('Error in phantomjs screenshot: exec: ' . $phantomjs_comm . '"' . dirname(__FILE__) .'/res/phantomjs/phantom-screenshot.js"' . ' "'. dirname(__FILE__) . '" "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '" , reterr: ' . $cmdResult);
}
else
{
if($wp_filesystem->exists($screenimageName))
{
$wp_filetype = wp_check_filetype( $screen_name . '.jpg', null );
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name( $screen_name . '.jpg' ),
'post_content' => '',
'post_status' => 'inherit'
);
$screens_attach_id = wp_insert_attachment( $attachment, $screenimageName . '.jpg' );
require_once( ABSPATH . 'wp-admin/includes/image.php' );
require_once( ABSPATH . 'wp-admin/includes/media.php' );
$attach_data = wp_generate_attachment_metadata( $screens_attach_id, $screenimageName . '.jpg' );
wp_update_attachment_metadata( $screens_attach_id, $attach_data );
}
else
{
crawlomatic_log_to_file('Error in phantomjs screenshot not found: exec: ' . $phantomjs_comm . '"' . dirname(__FILE__) .'/res/phantomjs/phantom-screenshot.js"' . ' "'. dirname(__FILE__) . '" "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '" , reterr: ' . $cmdResult);
}
}
}
}
}
}
elseif (isset($crawlomatic_Main_Settings['puppeteer_screen']) && $crawlomatic_Main_Settings['puppeteer_screen'] == 'on')
{
if($attach_screen == '1' || (strstr($post_content, '%%item_show_screenshot%%') !== false || strstr($post_content, '%%item_screenshot_url%%') !== false || strstr($custom_fields, '%%item_show_screenshot%%') !== false || strstr($custom_fields, '%%item_screenshot_url%%') !== false || strstr($custom_tax, '%%item_show_screenshot%%') !== false || strstr($custom_tax, '%%item_screenshot_url%%') !== false))
{
if(function_exists('shell' . '_exec'))
{
$disabled = explode(',', ini_get('disable_functions'));
if(!in_array('shell' . '_exec', $disabled))
{
$phantomjs_comm = 'node ';
if (isset($crawlomatic_Main_Settings['screenshot_height']) && $crawlomatic_Main_Settings['screenshot_height'] != '')
{
$h = esc_attr($crawlomatic_Main_Settings['screenshot_height']);
}
else
{
$h = '0';
}
if (isset($crawlomatic_Main_Settings['screenshot_width']) && $crawlomatic_Main_Settings['screenshot_width'] != '')
{
$w = esc_attr($crawlomatic_Main_Settings['screenshot_width']);
}
else
{
$w = '1920';
}
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = 'default';
}
if ($w < 350) {
$w = 350;
}
if ($w > 1920) {
$w = 1920;
}
$upload_dir = wp_upload_dir();
$dir_name = $upload_dir['basedir'] . '/crawlomatic-files';
$dir_url = $upload_dir['baseurl'] . '/crawlomatic-files';
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if (!$wp_filesystem->exists($dir_name)) {
wp_mkdir_p($dir_name);
}
$screen_name = uniqid();
$screenimageName = $dir_name . '/' . $screen_name . '.jpg';
$screenimageURL = $dir_url . '/' . $screen_name . '.jpg';
$phantomjs_proxcomm = '"null"';
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$phantomjs_proxcomm = '"' . trim($prx[$randomness]);
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$phantomjs_proxcomm .= '~~~' . trim($prx_auth[$randomness]);
}
}
$phantomjs_proxcomm .= '"';
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
$shefunc = trim(' s ') . trim(' h ') . 'ell' . '_exec';
$cmdResult = $shefunc($phantomjs_comm . '"' . dirname(__FILE__) .'/res/puppeteer/screenshot.js"' . ' "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' ' . $phantomjs_proxcomm . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . $phantomjs_timeout . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '" 2>&1');
if(stristr($cmdResult, 'sh: node: command not found') !== false || stristr($cmdResult, 'throw err;') !== false)
{
$screenimageURL = '';
crawlomatic_log_to_file('Error in puppeteer screenshot: exec: ' . $phantomjs_comm . '"' . dirname(__FILE__) .'/res/puppeteer/screenshot.js"' . ' "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' ' . $phantomjs_proxcomm . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . $phantomjs_timeout . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '", reterr: ' . $cmdResult);
}
else
{
if($wp_filesystem->exists($screenimageName))
{
$wp_filetype = wp_check_filetype( $screen_name . '.jpg', null );
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name( $screen_name . '.jpg' ),
'post_content' => '',
'post_status' => 'inherit'
);
$screens_attach_id = wp_insert_attachment( $attachment, $screenimageName);
require_once( ABSPATH . 'wp-admin/includes/image.php' );
require_once( ABSPATH . 'wp-admin/includes/media.php' );
$attach_data = wp_generate_attachment_metadata( $screens_attach_id, $screenimageName);
wp_update_attachment_metadata( $screens_attach_id, $attach_data );
}
else
{
crawlomatic_log_to_file('Error in puppeteer screenshot not found: exec: ' . $phantomjs_comm . '"' . dirname(__FILE__) .'/res/puppeteer/screenshot.js"' . ' "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' ' . $phantomjs_proxcomm . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . $phantomjs_timeout . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '", reterr: ' . $cmdResult);
}
}
}
}
}
}
}
if (isset($crawlomatic_Main_Settings['disable_excerpt']) && $crawlomatic_Main_Settings['disable_excerpt'] == "on")
{
$my_post['post_excerpt'] = '';
}
else
{
if(isset($items[$iloop]['excerpt']) && trim($items[$iloop]['excerpt']) !== '')
{
$arr = crawlomatic_spin_and_translate('test', $items[$iloop]['excerpt'], $translate, $source_lang, $use_proxy, $no_spin, $second_translate);
$new_post_excerpt = $arr[1];
if(trim($replace_words) != '')
{
$replace_arr = explode(',', trim($replace_words));
$replace_arr = array_map('trim', $replace_arr);
foreach($replace_arr as $rex)
{
$repla_parts = explode('|', $rex);
if(!isset($repla_parts[1]))
{
continue;
}
$new_post_excerpt = str_replace($repla_parts[0], $repla_parts[1], $new_post_excerpt);
}
}
$my_post['post_excerpt'] = trim($new_post_excerpt);
$description = trim($new_post_excerpt);
}
}
if ($limit_word_count !== "") {
$content = crawlomatic_custom_wp_trim_excerpt($content, $limit_word_count, $short_url, $read_more);
}
if (isset($crawlomatic_Main_Settings['only_imported']) && $crawlomatic_Main_Settings['only_imported'] == 'on')
{
$arr = crawlomatic_spin_and_translate($title, $content, $translate, $source_lang, $use_proxy, $no_spin, $second_translate);
$title = $arr[0];
$content = $arr[1];
}
$new_post_content = crawlomatic_replaceContentShortcodes($post_content, $title, $content, $short_url, $extra_categories, $item_tags, $get_img, $description, $read_more, $postdate, $items[$iloop]['price'], $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $items[$iloop]['gallery'], $gallery_regex, $replace_gallery_regex, $items[$iloop]['regular_price'], $item_regular_price_multi);
$new_post_title = crawlomatic_replaceContentShortcodes($post_title, $title, $content, $short_url, $extra_categories, $item_tags, $get_img, $description, $read_more, $postdate, $items[$iloop]['price'], $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $items[$iloop]['gallery'], $gallery_regex, $replace_gallery_regex, $items[$iloop]['regular_price'], $item_regular_price_multi);
if(trim($replace_words) != '')
{
$replace_arr = explode(',', trim($replace_words));
$replace_arr = array_map('trim', $replace_arr);
foreach($replace_arr as $rex)
{
$repla_parts = explode('|', $rex);
if(!isset($repla_parts[1]))
{
continue;
}
$new_post_content = str_replace($repla_parts[0], $repla_parts[1], $new_post_content);
$new_post_title = str_replace($repla_parts[0], $repla_parts[1], $new_post_title);
}
}
if($run_raw_html != '1')
{
if ($strip_by_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $strip_by_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_regex);
$xcnt = 0;
$need_to_cont = false;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_cont = preg_replace("~" . $sbr . "~i", $repreg, $new_post_content);
}
}
}
if (isset($crawlomatic_Main_Settings['strip_links']) && $crawlomatic_Main_Settings['strip_links'] == 'on') {
$new_post_content = crawlomatic_strip_links($new_post_content);
}
$my_post['screen_attach'] = $screens_attach_id;
$my_post['extra_categories'] = $extra_categories;
$my_post['extra_tags'] = $item_tags;
if (!isset($crawlomatic_Main_Settings['only_imported']) || $crawlomatic_Main_Settings['only_imported'] != 'on')
{
$arr = crawlomatic_spin_and_translate($new_post_title, $new_post_content, $translate, $source_lang, $use_proxy, $no_spin, $second_translate);
$new_post_title = $arr[0];
$new_post_content = $arr[1];
}
$new_post_title = html_entity_decode($new_post_title);
$new_post_content = html_entity_decode($new_post_content);
$title_count = -1;
if (isset($crawlomatic_Main_Settings['replace_url']) && $crawlomatic_Main_Settings['replace_url'] !== '') {
if(strstr($crawlomatic_Main_Settings['replace_url'], '%%original_url%%') !== false)
{
$repl = str_replace('%%original_url%%', '', $crawlomatic_Main_Settings['replace_url']);
$new_post_content1 = preg_replace('/<a(.+?)href=["\']([^"\']+?)["\']([^>]*?)>/i','<a$1href="$2' . esc_html($repl) . '"$3>', $new_post_content);
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
else
{
$new_post_content1 = preg_replace('/<a(.+?)href=["\']([^"\']+?)["\']([^>]*?)>/i','<a$1href="' . esc_url($crawlomatic_Main_Settings['replace_url']) . '"$3>', $new_post_content);
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
}
if ($strip_images == '1') {
$new_post_content = crawlomatic_strip_images($new_post_content);
}
$copy_str = '';
$also_imgs = false;
if($copy_types != '')
{
$xcopy_types = explode(',', $copy_types);
$xcopy_types = array_map('trim', $xcopy_types);
foreach($xcopy_types as $cpa)
{
$copy_str .= trim($cpa, '.') . '|';
}
$copy_str = trim($copy_str, '|');
}
if ($copy_images == '1' || (isset($crawlomatic_Main_Settings['copy_images']) && $crawlomatic_Main_Settings['copy_images'] == 'on'))
{
$also_imgs = true;
if($copy_str == '')
{
$copy_str = 'jpg|jpeg|png|gif|jpe|tif|tiff|svg|ico|webp';
}
else
{
$copy_str .= '|jpg|jpeg|png|gif|jpe|tif|tiff|svg|ico|webp';
}
}
if($copy_str != '')
{
if($also_imgs == true)
{
$new_post_content1 = preg_replace("~\ssrcset=['\"](?:[^'\"]*)['\"]~i", ' ', $new_post_content);
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
preg_match_all('/(?:http|https|ftp|ftps)?:\/\/\S+\.(?:' . $copy_str . ')/', $new_post_content, $matches);
if(isset($matches[0][0]))
{
$matches[0] = array_unique($matches[0]);
foreach($matches[0] as $match)
{
$match = crawlomatic_fix_single_link($match, $url);
$att_id = '';
$file_path = crawlomatic_copy_image_locally($match, $use_proxy, $request_delay, $custom_user_agent, $user_pass, $custom_cookies, $att_id);
if($file_path != false)
{
$file_path = str_replace('\', '/', $file_path);
$new_post_content = str_replace($match, $file_path, $new_post_content);
if($att_id != '')
{
$my_post['attach_ids'][] = $att_id;
}
}
}
}
}
if($copy_regex != '')
{
preg_match_all($copy_regex, $new_post_content, $matches);
if(isset($matches[0][0]))
{
$matches[0] = array_unique($matches[0]);
foreach($matches[0] as $match)
{
$match = crawlomatic_fix_single_link($match, $url);
$att_id = '';
$file_path = crawlomatic_copy_image_locally($match, $use_proxy, $request_delay, $custom_user_agent, $user_pass, $custom_cookies, $att_id);
if($file_path != false)
{
$file_path = str_replace('\', '/', $file_path);
$new_post_content = str_replace($match, $file_path, $new_post_content);
if($att_id != '')
{
$my_post['attach_ids'][] = $att_id;
}
}
}
}
}
if ((isset($crawlomatic_Main_Settings['link_attributes_internal']) && $crawlomatic_Main_Settings['link_attributes_internal'] !== '') || (isset($crawlomatic_Main_Settings['link_attributes_external']) && $crawlomatic_Main_Settings['link_attributes_external'] !== ''))
{
$new_post_content = crawlomatic_add_link_tags($new_post_content);
}
if (isset($crawlomatic_Main_Settings['iframe_resize_width']) && $crawlomatic_Main_Settings['iframe_resize_width'] !== '')
{
$new_post_content1 = preg_replace("~<iframe(.*?)(?:width=[\"\'](?:\d*?)[\"\'])?(.*?)>~i", '<iframe$1 width="' . esc_attr($crawlomatic_Main_Settings['iframe_resize_width']) . '"$2>', $new_post_content);
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
if (isset($crawlomatic_Main_Settings['iframe_resize_height']) && $crawlomatic_Main_Settings['iframe_resize_height'] !== '')
{
$new_post_content1 = preg_replace("~<iframe(.*?)(?:height=[\"\'](?:\d*?)[\"\'])?(.*?)>~i", '<iframe$1 height="' . esc_attr($crawlomatic_Main_Settings['iframe_resize_height']) . '"$2>', $new_post_content);
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
if($regex_image == '1')
{
if ($strip_by_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $strip_by_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_regex);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_cont = preg_replace("~" . $sbr . "~i", $repreg, $get_img);
if($temp_cont !== NULL)
{
$get_img = $temp_cont;
}
}
}
}
if($featured_replacer != '' && !empty($get_img))
{
$xfeatured_replacer = preg_split('/
|
|
/', $featured_replacer);
foreach($xfeatured_replacer as $featured_img_repl)
{
$repl_parts = explode('=>', $featured_img_repl);
if(isset($repl_parts[1]))
{
if(trim($repl_parts[0]) == $get_img)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Replacing featured image "' . esc_html(trim($repl_parts[0])) . '" with new image: "' . esc_html(trim($repl_parts[1])) . '"');
}
$get_img = trim($repl_parts[1]);
}
}
}
}
$my_post['crawlomatic_post_image'] = $get_img;
if ($strip_by_regex_title !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $strip_by_regex_title);
$xreplace_regex = preg_split('/
|
|
/', $replace_regex_title);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_cont_title = preg_replace("~" . $sbr . "~i", $repreg, $new_post_title);
if($temp_cont_title !== NULL)
{
$new_post_title = $temp_cont_title;
}
}
}
$exc_cont = $content;
if ($strip_by_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $strip_by_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_regex);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_contx = preg_replace("~" . $sbr . "~i", $repreg, $exc_cont);
if($temp_contx !== NULL)
{
$exc_cont = $temp_contx;
}
}
}
$new_post_content = str_replace('</ iframe>', '</iframe>', $new_post_content);
if ($keep_source == '1')
{
$new_post_content1 = preg_replace('{"https:\/\/translate.google.com\/translate\?hl=(?:.*?)&prev=_t&sl=(?:.*?)&tl=(?:.*?)&u=([^"]*?)"}i', "$1", urldecode($new_post_content));
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
if (isset($crawlomatic_Main_Settings['fix_html']) && $crawlomatic_Main_Settings['fix_html'] == "on")
{
$new_post_content = crawlomatic_repairHTML($new_post_content);
if (isset($crawlomatic_Main_Settings['alt_read']) && $crawlomatic_Main_Settings['alt_read'] == "on")
{
$new_post_content = str_replace('<html><body>', '', $new_post_content);
$new_post_content = str_replace('</body></html>', '', $new_post_content);
$new_post_content = str_replace('<a ', ' <a ', $new_post_content);
}
}
if (isset($crawlomatic_Main_Settings['strip_html']) && $crawlomatic_Main_Settings['strip_html'] == 'on') {
$new_post_content = crawlomatic_strip_html_tags_nl($new_post_content);
}
if (!isset($crawlomatic_Main_Settings['disable_excerpt']) || $crawlomatic_Main_Settings['disable_excerpt'] != "on")
{
if(!isset($items[$iloop]['excerpt']) || trim($items[$iloop]['excerpt']) == '')
{
if ($translate != "disabled" && $translate != "en") {
$my_post['post_excerpt'] = crawlomatic_getExcerpt($new_post_content);
} else {
$my_post['post_excerpt'] = crawlomatic_getExcerpt($exc_cont);
}
}
}
$my_post['post_content'] = trim($new_post_content);
$my_post['auto_delete'] = '';
if ($auto_delete !== "") {
$auto_delete = trim($auto_delete, ' "');
$del_time = strtotime($auto_delete);
if($del_time !== false)
{
$my_post['auto_delete'] = $del_time;
}
}
$my_post['post_title'] = $new_post_title;
$my_post['original_title'] = $title;
$my_post['original_content'] = $content;
$my_post['crawlomatic_timestamp'] = crawlomatic_get_date_now();
$my_post['crawlomatic_post_format'] = $post_format;
if ($enable_pingback == '1') {
$my_post['ping_status'] = 'open';
} else {
$my_post['ping_status'] = 'closed';
}
if($parent_id != '')
{
$my_post['post_parent'] = intval($parent_id);
}
$custom_arr = array();
if($custom_fields != '')
{
if(stristr($custom_fields, '=>') != false)
{
$rule_arr = explode(',', trim($custom_fields));
foreach($rule_arr as $rule)
{
$my_args = explode('=>', trim($rule));
if(isset($my_args[1]))
{
if(isset($my_args[2]))
{
$req_list = explode(',', $my_args[2]);
$required_found = false;
foreach($req_list as $rl)
{
if(function_exists('mb_stristr'))
{
if(mb_stristr($new_post_content, trim($rl)) !== false)
{
$required_found = true;
break;
}
}
else
{
if(stristr($new_post_content, trim($rl)) === false)
{
$required_found = true;
break;
}
}
if(function_exists('mb_stristr'))
{
if(mb_stristr($new_post_title, trim($rl)) !== false)
{
$required_found = true;
break;
}
}
else
{
if(stristr($new_post_title, trim($rl)) === false)
{
$required_found = true;
break;
}
}
}
if($required_found === false)
{
if(isset($my_args[3]))
{
$my_args[1] = $my_args[3];
}
else
{
continue;
}
}
}
$custom_field_content = trim($my_args[1]);
$custom_field_content = crawlomatic_replaceContentShortcodes($custom_field_content, $new_post_title, $new_post_content, $short_url, $extra_categories, $item_tags, $get_img, $description, $read_more, $postdate, $items[$iloop]['price'], $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $items[$iloop]['gallery'], $gallery_regex, $replace_gallery_regex, $items[$iloop]['regular_price'], $item_regular_price_multi);
if(stristr($my_args[0], '[') !== false && stristr($my_args[0], ']') !== false)
{
preg_match_all('#([^\[\]]*?)\[([^\[\]]*?)\]#', $my_args[0], $cfm);
if(isset($cfm[2][0]))
{
if(isset($custom_arr[trim($cfm[1][0])]) && is_array($custom_arr[trim($cfm[1][0])]))
{
$custom_arr[trim($cfm[1][0])] = array_merge($custom_arr[trim($cfm[1][0])], array(trim($cfm[2][0]) => $custom_field_content));
}
else
{
$custom_arr[trim($cfm[1][0])] = array(trim($cfm[2][0]) => $custom_field_content);
}
}
else
{
$custom_arr[trim($my_args[0])] = $custom_field_content;
}
}
else
{
$custom_arr[trim($my_args[0])] = $custom_field_content;
}
}
}
}
}
if($woo_active && ($post_type == 'product' || $post_type == 'product_variation'))
{
if(strstr($custom_fields, '_price') === false)
{
$custom_arr['_price'] = $item_price_multi;
}
if(strstr($custom_fields, '_sale_price') === false)
{
$custom_arr['_sale_price'] = $item_price_multi;
}
if(strstr($custom_fields, '_regular_price') === false)
{
if(!empty($item_regular_price_multi) && $item_regular_price_multi !== 0)
{
$custom_arr['_regular_price'] = $item_regular_price_multi;
}
else
{
$custom_arr['_regular_price'] = $item_price_multi;
}
}
if(strstr($custom_fields, '_visibility') === false)
{
$custom_arr['_visibility'] = 'visible';
}
if(strstr($custom_fields, '_manage_stock') === false)
{
$custom_arr['_manage_stock'] = 'no';
}
if(strstr($custom_fields, '_stock_status') === false)
{
$custom_arr['_stock_status'] = 'instock';
}
if(strstr($custom_fields, '_sku') === false)
{
$custom_arr['_sku'] = crawlomatic_generate_random_string(10);
}
}
$custom_tax_arr = array();
if($custom_tax != '')
{
if(stristr($custom_tax, '=>') != false)
{
$rule_arr = explode(';', trim($custom_tax));
foreach($rule_arr as $rule)
{
$my_args = explode('=>', trim($rule));
if(isset($my_args[1]))
{
if(isset($my_args[2]))
{
$req_list = explode(',', $my_args[2]);
$required_found = false;
foreach($req_list as $rl)
{
if(function_exists('mb_stristr'))
{
if(mb_stristr($new_post_content, trim($rl)) !== false)
{
$required_found = true;
break;
}
}
else
{
if(stristr($new_post_content, trim($rl)) === false)
{
$required_found = true;
break;
}
}
if(function_exists('mb_stristr'))
{
if(mb_stristr($new_post_title, trim($rl)) !== false)
{
$required_found = true;
break;
}
}
else
{
if(stristr($new_post_title, trim($rl)) === false)
{
$required_found = true;
break;
}
}
}
if($required_found === false)
{
if(isset($my_args[3]))
{
$my_args[1] = $my_args[3];
}
else
{
continue;
}
}
}
$custom_tax_content = trim($my_args[1]);
$custom_tax_content = crawlomatic_replaceContentShortcodes($custom_tax_content, $new_post_title, $new_post_content, $short_url, $extra_categories, $item_tags, $get_img, $description, $read_more, $postdate, $items[$iloop]['price'], $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $items[$iloop]['gallery'], $gallery_regex, $replace_gallery_regex, $items[$iloop]['regular_price'], $item_regular_price_multi);
if(substr(trim($my_args[0]), 0, 3) === "pa_" && $post_type == 'product' && !empty($custom_tax_content))
{
if(isset($custom_arr['_product_attributes']))
{
$custom_arr['_product_attributes'] = array_merge($custom_arr['_product_attributes'], array(trim($my_args[0]) =>array(
'name' => trim($my_args[0]),
'value' => $custom_tax_content,
'is_visible' => '1',
'is_taxonomy' => '1'
)));
}
else
{
$custom_arr['_product_attributes'] = array(trim($my_args[0]) =>array(
'name' => trim($my_args[0]),
'value' => $custom_tax_content,
'is_visible' => '1',
'is_taxonomy' => '1'
));
}
}
if(isset($custom_tax_arr[trim($my_args[0])]))
{
$custom_tax_arr[trim($my_args[0])] .= ',' . $custom_tax_content;
}
else
{
$custom_tax_arr[trim($my_args[0])] = $custom_tax_content;
}
}
}
}
}
if(count($custom_tax_arr) > 0)
{
$my_post['taxo_input'] = $custom_tax_arr;
}
$my_post['meta_input'] = $custom_arr;
if($my_post['post_content'] === '' && $my_post['post_title'] === '')
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Skipping post from updating, blank content and title: ' . $my_post['ID']);
}
continue;
}
if (isset($crawlomatic_Main_Settings['up_publish_date']) && $crawlomatic_Main_Settings['up_publish_date'] == 'on')
{
$my_post['post_date_gmt'] = gmdate("Y-m-d H:i:s", time());
$my_post['post_date'] = date("Y-m-d H:i:s", time());
}
if (isset($crawlomatic_Main_Settings['cleanup_not_printable']) && $crawlomatic_Main_Settings['cleanup_not_printable'] == 'on')
{
$pxca = preg_replace('/[-]/u', '', $my_post['post_content']);
if($pxca !== null)
{
$my_post['post_content'] = $pxca;
}
$pxta = preg_replace('/[-]/u', '', $my_post['post_title']);
if($pxta !== null)
{
$my_post['post_title'] = $pxta;
}
}
if(isset($items[$iloop]['variant_parent']))
{
$my_post['post_parent'] = $items[$iloop]['variant_parent'];
if($my_post['post_type'] == 'product')
{
$my_post['post_type'] = 'product_variation';
}
}
if (!isset($crawlomatic_Main_Settings['keep_filters']) || $crawlomatic_Main_Settings['keep_filters'] != 'on')
{
remove_filter('content_save_pre', 'wp_filter_post_kses');
remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');remove_filter('title_save_pre', 'wp_filter_kses');
}
$post_id = wp_update_post($my_post, true);
if (!isset($crawlomatic_Main_Settings['keep_filters']) || $crawlomatic_Main_Settings['keep_filters'] != 'on')
{
add_filter('content_save_pre', 'wp_filter_post_kses');
add_filter('content_filtered_save_pre', 'wp_filter_post_kses');add_filter('title_save_pre', 'wp_filter_kses');
}
if (!is_wp_error($post_id))
{
if($post_id === 0)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Post not updated, returned error: ' . $my_post['ID']);
}
continue;
}
$updated++;
if($wpml_lang != '' && function_exists('pll_set_post_language'))
{
pll_set_post_language($post_id, $wpml_lang);
}
if(isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file ('[AutoUpdater] Updated ID: ' . $post_id . ' successfully!');
}
if(isset($items[$iloop]['variant_parent']))
{
if(class_exists('WC_Product_Variation') && $post_type == 'product')
{
$pvariation = new WC_Product_Variation( $post_id );
$pvariation->set_sale_price( $item_price_multi );
$pvariation->set_price( $item_price_multi );
if(!empty($item_regular_price_multi) && $item_regular_price_multi !== 0)
{
$pvariation->set_regular_price( $item_regular_price_multi );
}
else
{
$pvariation->set_regular_price( $item_price_multi );
}
$pvariation->set_manage_stock(false);
if(isset($items[$iloop]['extrainfo']['weight']))
{
$pvariation->set_weight($items[$iloop]['extrainfo']['weight']);
}
if(isset($items[$iloop]['extrainfo']['length']))
{
$pvariation->set_length($items[$iloop]['extrainfo']['length']);
}
if(isset($items[$iloop]['extrainfo']['width']))
{
$pvariation->set_width($items[$iloop]['extrainfo']['width']);
}
if(isset($items[$iloop]['extrainfo']['height']))
{
$pvariation->set_height($items[$iloop]['extrainfo']['height']);
}
if(isset($items[$iloop]['extrainfo']['is_downloadable']))
{
$pvariation->set_downloadable($items[$iloop]['extrainfo']['is_downloadable']);
}
if(isset($items[$iloop]['extrainfo']['is_in_stock']))
{
$pvariation->set_stock_status($items[$iloop]['extrainfo']['is_in_stock']);
}
if(isset($items[$iloop]['extrainfo']['is_virtual']))
{
$pvariation->set_virtual($items[$iloop]['extrainfo']['is_virtual']);
}
$pvariation->save();
}
}
$variant_names = '';
if(isset($items[$iloop]['variants']))
{
$pvariations = get_children(array(
'post_parent' => $my_post['ID'],
'post_type' => 'product_variation',
'fields' => 'ids'
));
if(!empty($variants_label))
{
$attr_label = $variants_label;
}
else
{
$attr_label = esc_html__('Variants', 'crawlomatic-multipage-scraper-post-generator');
}
$attr_slug = sanitize_title($attr_label);
foreach($items[$iloop]['variants'] as $varpost)
{
$found_posted = false;
if (!empty($pvariations)) {
foreach ($pvariations as $vindex => $variation_id) {
$zm = get_post_meta( $variation_id, 'attribute_' . $attr_slug, true );
if(!empty($zm) && $zm == $varpost['variant_name'])
{
crawlomatic_log_to_file('Found variation ID for updating: ' . $variation_id);
$varpost['ID'] = $variation_id;
$varpost['post_parent'] = $post_id;
$found_posted = true;
unset($pvariations[$vindex]);
break;
}
}
}
if(!$found_posted)
{
$varpost['post_parent'] = $post_id;
}
if(isset($varpost['variant_name']))
{
$variant_names .= ' ' . trim($varpost['variant_name']) . ' |';
}
$varpost['variant_parent'] = $post_id;
$items[] = $varpost;
}
if (!empty($pvariations)) {
foreach ($pvariations as $variation_id) {
crawlomatic_log_to_file('Deleting product variation, as it was no longer found: ' . print_r($variation_id, true));
wp_delete_post($variation_id, true);
}
}
$variant_names = rtrim(trim($variant_names, '|'));
if($post_type == 'product')
{
wp_set_object_terms( $post_id, 'variable', 'product_type' );
$attributes_array = get_post_meta( $post_id, '_product_attributes', true);
if(!is_array($attributes_array))
{
$attributes_array = array();
}
if(!empty($variants_label))
{
$attr_label = $variants_label;
}
else
{
$attr_label = esc_html__('Variants', 'crawlomatic-multipage-scraper-post-generator');
}
$attr_slug = sanitize_title($attr_label);
$attributes_array[$attr_slug] = array(
'name' => $attr_label,
'value' => trim($variant_names),
'is_visible' => '1',
'is_variation' => '1',
'is_taxonomy' => '0'
);
update_post_meta( $post_id, '_product_attributes', $attributes_array );
}
}
if(isset($items[$iloop]['variant_name']))
{
if(!empty($variants_label))
{
$attr_label = $variants_label;
}
else
{
$attr_label = esc_html__('Variants', 'crawlomatic-multipage-scraper-post-generator');
}
$attr_slug = sanitize_title($attr_label);
update_post_meta( $post_id, 'attribute_' . $attr_slug, trim($items[$iloop]['variant_name']) );
}
if(count($my_post['download_local']) > 0)
{
foreach($my_post['download_local'] as $more_dl)
{
$wp_filetype = wp_check_filetype( $more_dl, null );
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => 'Downloaded file for post ID ' . $post_id,
'post_content' => '',
'post_status' => 'inherit'
);
$screens_attach_id = wp_insert_attachment($attachment, $more_dl, $post_id);
require_once( ABSPATH . 'wp-admin/includes/image.php' );
require_once( ABSPATH . 'wp-admin/includes/media.php' );
$attach_data = wp_generate_attachment_metadata($screens_attach_id, $more_dl);
wp_update_attachment_metadata( $screens_attach_id, $attach_data );
if($post_type == 'product' && class_exists('WC_Product_Download'))
{
$file_url = wp_get_attachment_url( $screens_attach_id );
$download_id = md5( $file_url );
$file_name = $my_post['post_title'];
$pd_object = new WC_Product_Download();
$pd_object->set_id( $download_id );
$pd_object->set_name( $file_name );
$pd_object->set_file( $file_url );
$product = wc_get_product( $post_id );
if($product !== null)
{
$downloads = $product->get_downloads();
$downloads[$download_id] = $pd_object;
$product->set_downloads($downloads);
$product->save();
}
}
}
}
if(isset($my_post['taxo_input']) && count($my_post['taxo_input']) > 0)
{
foreach($my_post['taxo_input'] as $taxn => $taxval)
{
$taxn = trim($taxn);
$taxval = trim($taxval);
if(is_taxonomy_hierarchical($taxn))
{
$taxval = array_map('trim', explode(',', $taxval));
for($ii = 0; $ii < count($taxval); $ii++)
{
if(!is_numeric($taxval[$ii]))
{
$xtermid = get_term_by('name', $taxval[$ii], $taxn);
if($xtermid !== false)
{
$taxval[$ii] = intval($xtermid->term_id);
}
else
{
wp_insert_term( $taxval[$ii], $taxn);
$xtermid = get_term_by('name', $taxval[$ii], $taxn);
if($xtermid !== false)
{
if($wpml_lang != '' && function_exists('pll_set_term_language'))
{
pll_set_term_language($xtermid->term_id, $wpml_lang);
}
elseif($wpml_lang != '' && has_filter('wpml_object_id'))
{
$wpml_element_type = apply_filters( 'wpml_element_type', $taxn );
$pars['element_id'] = $xtermid->term_id;
$pars['element_type'] = $wpml_element_type;
$pars['language_code'] = $wpml_lang;
$pars['trid'] = FALSE;
$pars['source_language_code'] = NULL;
do_action('wpml_set_element_language_details', $pars);
}
$taxval[$ii] = intval($xtermid->term_id);
}
}
}
}
wp_set_post_terms($post_id, $taxval, $taxn, true);
}
else
{
wp_set_post_terms($post_id, trim($taxval), $taxn, true);
}
}
}
if (isset($my_post['crawlomatic_post_format']) && $my_post['crawlomatic_post_format'] != '' && $my_post['crawlomatic_post_format'] != 'post-format-standard') {
wp_set_post_terms($post_id, $my_post['crawlomatic_post_format'], 'post_format', false);
}
if($my_post['screen_attach'] != '')
{
$media_post = wp_update_post( array(
'ID' => $my_post['screen_attach'],
'post_parent' => $post_id,
), true );
if( is_wp_error( $media_post ) ) {
crawlomatic_log_to_file( 'Failed to assign post attachment ' . $my_post['screen_attach'] . ' to post id ' . $post_id . ': ' . print_r( $media_post, 1 ) );
}
}
$featured_path = '';
$image_failed = false;
if (!isset($crawlomatic_Main_Settings['no_up_img']) || $crawlomatic_Main_Settings['no_up_img'] != 'on')
{
if(isset($my_post['post_gallery']) && !empty($my_post['post_gallery']))
{
if (($key = array_search($my_post['crawlomatic_post_image'], $my_post['post_gallery'])) !== false) {
unset($my_post['post_gallery'][$key]);
$my_post['post_gallery'] = array_values($my_post['post_gallery']);
}
$xcounter = 1;
$attach_ids = array();
for($cntj = 0;$cntj < count($my_post['post_gallery']); $cntj++)
{
$my_post['post_gallery'][$cntj] = htmlspecialchars_decode($my_post['post_gallery'][$cntj]);
if ($gallery_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $gallery_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_gallery_regex);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_cont_gallery = preg_replace("~" . $sbr . "~i", $repreg, $my_post['post_gallery'][$cntj]);
if($temp_cont_gallery !== NULL)
{
$my_post['post_gallery'][$cntj] = $temp_cont_gallery;
}
}
}
}
$my_post['post_gallery'] = array_unique($my_post['post_gallery']);
if(count($my_post['post_gallery']) > 0)
{
$product_images = get_post_meta($my_post['ID'], '_product_image_gallery', true);
if($product_images){
$product_image_gallery = explode(',', $product_images);
foreach ($product_image_gallery as $image_id) {
wp_delete_attachment($image_id, true);
}
}
}
foreach($my_post['post_gallery'] as $gimg)
{
$gimg = trim($gimg);
if(empty($gimg))
{
continue;
}
$uploaded_gallery = crawlomatic_upload_attachment_media($gimg, $post_id, $use_proxy, $request_delay, $custom_user_agent, $custom_cookies, $user_pass, $xcounter);
if($uploaded_gallery === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('crawlomatic_upload_attachment_media failed for ' . $gimg . '!');
}
}
else
{
$attach_ids[] = $uploaded_gallery;
}
$xcounter++;
}
if($post_type == 'product' && !empty($attach_ids))
{
update_post_meta($post_id, '_product_image_gallery', implode(',', $attach_ids));
}
}
}
if (!isset($crawlomatic_Main_Settings['no_up_img']) || $crawlomatic_Main_Settings['no_up_img'] != 'on')
{
if ($featured_image == '1') {
$get_img = $my_post['crawlomatic_post_image'];
if ($get_img != '') {
if (!crawlomatic_generate_featured_image($get_img, $post_id, $use_proxy, $request_delay, $custom_user_agent, $custom_cookies, $user_pass)) {
$image_failed = true;
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('crawlomatic_generate_featured_image failed for ' . $get_img . '!');
}
} else {
$featured_path = $get_img;
if ( ! add_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path, true ) )
{
update_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path );
}
}
} else {
$image_failed = true;
}
}
if ($image_failed || $featured_image !== '1') {
if ($image_url != '')
{
$replacement = str_replace(array('[', ']'), '', $my_post['post_title']);
$image_url_temp = str_replace('%%item_title%%', $replacement, $image_url);
$image_url_temp = preg_replace_callback('#%%random_image\[([^\]]*?)\]%%#', function ($matches) {
$my_img = crawlomatic_get_random_image_google($matches[1]);
return $my_img;
}, $image_url_temp);
$image_urlx = explode(',', $image_url_temp);
$image_urlx = trim($image_urlx[array_rand($image_urlx)]);
$retim = false;
if(is_numeric($image_urlx) && $image_urlx > 0)
{
require_once(ABSPATH . 'wp-admin/includes/image.php');
require_once(ABSPATH . 'wp-admin/includes/media.php');
$res2 = set_post_thumbnail($post_id, $image_urlx);
if ($res2 === FALSE) {
}
else
{
$retim = true;
}
}
if($retim == false && $image_urlx != '')
{
if (isset($crawlomatic_Main_Settings['crawlomatic_featured_image_checking']) && $crawlomatic_Main_Settings['crawlomatic_featured_image_checking'] == 'on') {
stream_context_set_default( [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
error_reporting(0);
$url_headers = get_headers($image_urlx, 1);
error_reporting(E_ALL);
if (isset($url_headers['Content-Type'])) {
if (is_array($url_headers['Content-Type'])) {
$img_type = strtolower($url_headers['Content-Type'][0]);
} else {
$img_type = strtolower($url_headers['Content-Type']);
}
if (strstr($img_type, 'image/') !== false) {
if (!crawlomatic_generate_featured_image($image_urlx, $post_id, $use_proxy, $request_delay, $custom_user_agent, $custom_cookies, $user_pass)) {
$image_failed = true;
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('crawlomatic_generate_featured_image failed to default value: ' . $image_urlx . '!');
}
} else {
$featured_path = $image_urlx;
if ( ! add_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path, true ) ) {
update_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path );
}
}
}
}
}
else
{
if (!crawlomatic_generate_featured_image($image_urlx, $post_id, $use_proxy, $request_delay, $custom_user_agent, $custom_cookies, $user_pass)) {
$image_failed = true;
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('crawlomatic_generate_featured_image failed to default value: ' . $image_urlx . '!');
}
} else {
$featured_path = $image_urlx;
if ( ! add_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path, true ) ) {
update_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path );
}
}
}
}
}
}
}
if ($can_create_tag == '1') {
if(strstr($custom_tax, 'product_tag') === false)
{
if ($my_post['tags_input'] != '')
{
if($post_type == 'product')
{
wp_set_post_terms($post_id, $my_post['tags_input'], 'product_tag', false);
}
}
}
}
if ($auto_categories == '1') {
if(strstr($custom_tax, 'product_cat') === false)
{
if ($my_post['extra_categories'] != '') {
if($post_type == 'product')
{
if($parent_category_id != '')
{
$termid = crawlomatic_create_terms('product_cat', $parent_category_id, $my_post['extra_categories'], $remove_cats);
}
else
{
$termid = crawlomatic_create_terms('product_cat', null, $my_post['extra_categories'], $remove_cats);
}
if($wpml_lang != '' && function_exists('pll_set_term_language'))
{
foreach($termid as $tx)
{
pll_set_term_language($tx, $wpml_lang);
}
}
elseif($wpml_lang != '' && has_filter('wpml_object_id'))
{
$wpml_element_type = apply_filters( 'wpml_element_type', 'product_cat' );
foreach($termid as $tx)
{
$pars['element_id'] = $tx;
$pars['element_type'] = $wpml_element_type;
$pars['language_code'] = $wpml_lang;
$pars['trid'] = FALSE;
$pars['source_language_code'] = NULL;
do_action('wpml_set_element_language_details', $pars);
}
}
}
else
{
if($parent_category_id != '')
{
$termid = crawlomatic_create_terms('category', $parent_category_id, $my_post['extra_categories'], $remove_cats);
}
else
{
$termid = crawlomatic_create_terms('category', null, $my_post['extra_categories'], $remove_cats);
}
if($wpml_lang != '' && function_exists('pll_set_term_language'))
{
foreach($termid as $tx)
{
pll_set_term_language($tx, $wpml_lang);
}
}
elseif($wpml_lang != '' && has_filter('wpml_object_id'))
{
$wpml_element_type = apply_filters( 'wpml_element_type', 'category' );
foreach($termid as $tx)
{
$pars['element_id'] = $tx;
$pars['element_type'] = $wpml_element_type;
$pars['language_code'] = $wpml_lang;
$pars['trid'] = FALSE;
$pars['source_language_code'] = NULL;
do_action('wpml_set_element_language_details', $pars);
}
}
}
if($post_type == 'product')
{
wp_set_post_terms($post_id, $termid, 'product_cat', true);
}
else
{
wp_set_post_terms($post_id, $termid, 'category', true);
}
}
}
}
if (isset($default_category) && $default_category !== 'crawlomatic_no_category_12345678' && $default_category[0] !== 'crawlomatic_no_category_12345678') {
if(is_array($default_category))
{
$cats = array();
$wcats = array();
foreach($default_category as $dc)
{
if(substr($dc, 0, 1) === 'w')
{
$wcats[] = ltrim($dc, 'w');
}
else
{
$cats[] = $dc;
}
}
if($post_type == 'product')
{
global $sitepress;
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$current_language = apply_filters( 'wpml_current_language', NULL );
$sitepress->switch_lang($wpml_lang);
}
wp_set_post_terms($post_id, $wcats, 'product_cat', true);
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$sitepress->switch_lang($current_language);
}
}
else
{
global $sitepress;
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$current_language = apply_filters( 'wpml_current_language', NULL );
$sitepress->switch_lang($wpml_lang);
}
wp_set_post_categories($post_id, $cats, true);
if($wpml_lang != '' && function_exists('pll_set_term_language'))
{
foreach($cats as $cc)
{
pll_set_term_language($cc, $wpml_lang);
}
}
elseif($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$sitepress->switch_lang($current_language);
}
}
}
else
{
$cats = array();
$wcats = array();
if(substr($default_category, 0, 1) === 'w')
{
$wcats[] = ltrim($default_category, 'w');
}
else
{
$cats[] = $default_category;
}
if($post_type == 'product')
{
global $sitepress;
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$current_language = apply_filters( 'wpml_current_language', NULL );
$sitepress->switch_lang($wpml_lang);
}
wp_set_post_terms($post_id, $wcats, 'product_cat', true);
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$sitepress->switch_lang($current_language);
}
}
else
{
global $sitepress;
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$current_language = apply_filters( 'wpml_current_language', NULL );
$sitepress->switch_lang($wpml_lang);
}
wp_set_post_categories($post_id, $cats, true);
if($wpml_lang != '' && function_exists('pll_set_term_language'))
{
foreach($cats as $cc)
{
pll_set_term_language($cc, $wpml_lang);
}
}
elseif($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$sitepress->switch_lang($current_language);
}
}
}
}
if (isset($crawlomatic_Main_Settings['post_source_custom']) && $crawlomatic_Main_Settings['post_source_custom'] != '') {
$tax_rez = wp_set_object_terms( $post_id, $crawlomatic_Main_Settings['post_source_custom'], 'coderevolution_post_source', false);
}
else
{
$tax_rez = wp_set_object_terms( $post_id, 'Crawlomatic_' . $param, 'coderevolution_post_source', false);
}
if (is_wp_error($tax_rez)) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('wp_set_object_terms failed for: ' . $post_id . '!');
}
}
if($post_type == 'topic' && $parent_id != '')
{
update_post_meta($post_id, '_bbp_forum_id', $parent_id);
update_post_meta($post_id, '_bbp_topic_id', $post_id);
update_post_meta($post_id, '_bbp_voice_count', '0');
update_post_meta($post_id, '_bbp_reply_count', '0');
update_post_meta($post_id, '_bbp_reply_count_hidden', '0');
update_post_meta($post_id, '_bbp_last_reply_id', '0');
update_post_meta($post_id, '_bbp_last_active_id', $post_id);
update_post_meta($post_id, '_bbp_last_active_time', get_post_field( 'post_date', $post_id, 'db' ));
do_action( 'bbp_insert_topic', (int) $post_id, (int) $parent_id );
}
if($post_type == 'reply' && $parent_id != '')
{
if(function_exists('bbp_get_topic_forum_id'))
{
$forum_aidi = bbp_get_topic_forum_id($parent_id);
if(empty($forum_aidi))
{
$forum_aidi = 0;
}
}
else
{
$forum_aidi = 0;
}
do_action( 'bbp_insert_reply', (int) $post_id, (int) $parent_id, (int) $forum_aidi );
}
if (isset($crawlomatic_Main_Settings['link_source']) && $crawlomatic_Main_Settings['link_source'] == 'on') {
$title_link_url = '1';
}
else
{
$title_link_url = '0';
}
if($featured_path == '')
{
$featured_path = $my_post['crawlomatic_post_image'];
}
if(!empty($my_post['attach_ids']))
{
foreach($my_post['attach_ids'] as $zatt_id)
{
crawlomatic_add_attachment_to_post($post_id, $zatt_id);
}
}
crawlomatic_addPostMeta($post_id, $my_post, $param, $featured_path, $title_link_url, $css_cont, $rule_unique_id, $crawlomatic_Main_Settings);
if($wpml_lang != '' && (class_exists('SitePress') || function_exists('wpml_object_id')))
{
$wpml_element_type = apply_filters( 'wpml_element_type', $post_type );
$pars['element_id'] = $post_id;
$pars['element_type'] = $wpml_element_type;
$pars['language_code'] = $wpml_lang;
$pars['source_language_code'] = NULL;
do_action('wpml_set_element_language_details', $pars);
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if($wp_filesystem->exists(WP_PLUGIN_DIR . '/sitepress-multilingual-cms/inc/wpml-api.php'))
{
include_once( WP_PLUGIN_DIR . '/sitepress-multilingual-cms/inc/wpml-api.php' );
}
$wpml_lang = trim($wpml_lang);
if(function_exists('wpml_update_translatable_content'))
{
wpml_update_translatable_content('post_' . $post_type, $post_id, $wpml_lang);
if($my_post['crawlomatic_post_orig_url'] != '')
{
global $sitepress;
global $wpdb;
$keyid = md5($my_post['crawlomatic_post_orig_url']);
$keyName = $keyid . '_wpml';
$rezxxxa = $wpdb->get_results( "SELECT * FROM {$wpdb->prefix}postmeta WHERE `meta_key` = '$keyName' limit 1", ARRAY_A );
if(count($rezxxxa) != 0)
{
$metaRow = $rezxxxa[0];
$metaValue = $metaRow['meta_value'];
$metaParts = explode('_', $metaValue);
$sitepress->set_element_language_details($post_id, 'post_'.$my_post['post_type'] , $metaParts[0], $wpml_lang, $metaParts[1] );
}
else
{
$ptrid = $sitepress->get_element_trid($post_id);
update_post_meta($post_id, $keyid.'_wpml', $ptrid.'_'.$wpml_lang );
}
}
}
}
if(isset($items[$iloop]['variant_parent']))
{
if(class_exists('WC_Product_Variable'))
{
WC_Product_Variable::sync( $items[$iloop]['variant_parent'] );
}
}
} else {
if(isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Failed to update post to the database! Title:' . $my_post['post_title'] . '! Error: ' . $post_id->get_error_message() . 'Error code: ' . $post_id->get_error_code() . 'Error data: ' . $post_id->get_error_data());
}
continue;
}
}
}
}
}
}
wp_suspend_cache_addition(false);
}
}
catch (Exception $e) {
crawlomatic_log_to_file ('[AutoUpdater] Exception thrown at auto update: ' . esc_html($e->getMessage()) . '!');
return;
}
if(isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file ('[AutoUpdater] Auto update process successful! Updated posts: ' . $updated);
}
}
function crawlomatic_cron_schedule()
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['crawlomatic_enabled']) && $crawlomatic_Main_Settings['crawlomatic_enabled'] === 'on') {
if (!wp_next_scheduled('crawlomaticaction')) {
$unlocker = get_option('crawlomatic_minute_running_unlocked', false);
if($unlocker == '1')
{
$rez = wp_schedule_event(time(), 'minutely', 'crawlomaticaction');
}
else
{
$rez = wp_schedule_event(time(), 'hourly', 'crawlomaticaction');
}
if ($rez === FALSE) {
crawlomatic_log_to_file('[Scheduler] Failed to schedule crawlomaticaction to crawlomatic_cron!');
}
}
if (isset($crawlomatic_Main_Settings['enable_logging']) && $crawlomatic_Main_Settings['enable_logging'] === 'on' && isset($crawlomatic_Main_Settings['auto_clear_logs']) && $crawlomatic_Main_Settings['auto_clear_logs'] !== 'No') {
if (!wp_next_scheduled('crawlomaticactionclear')) {
$rez = wp_schedule_event(time(), $crawlomatic_Main_Settings['auto_clear_logs'], 'crawlomaticactionclear');
if ($rez === FALSE) {
crawlomatic_log_to_file('[Scheduler] Failed to schedule crawlomaticactionclear to ' . $crawlomatic_Main_Settings['auto_clear_logs'] . '!');
}
add_option('crawlomatic_schedule_time', $crawlomatic_Main_Settings['auto_clear_logs']);
} else {
if (!get_option('crawlomatic_schedule_time')) {
wp_clear_scheduled_hook('crawlomaticactionclear');
$rez = wp_schedule_event(time(), $crawlomatic_Main_Settings['auto_clear_logs'], 'crawlomaticactionclear');
add_option('crawlomatic_schedule_time', $crawlomatic_Main_Settings['auto_clear_logs']);
if ($rez === FALSE) {
crawlomatic_log_to_file('[Scheduler] Failed to schedule crawlomaticactionclear to ' . $crawlomatic_Main_Settings['auto_clear_logs'] . '!');
}
} else {
$the_time = get_option('crawlomatic_schedule_time');
if ($the_time != $crawlomatic_Main_Settings['auto_clear_logs']) {
wp_clear_scheduled_hook('crawlomaticactionclear');
delete_option('crawlomatic_schedule_time');
$rez = wp_schedule_event(time(), $crawlomatic_Main_Settings['auto_clear_logs'], 'crawlomaticactionclear');
add_option('crawlomatic_schedule_time', $crawlomatic_Main_Settings['auto_clear_logs']);
if ($rez === FALSE) {
crawlomatic_log_to_file('[Scheduler] Failed to schedule crawlomaticactionclear to ' . $crawlomatic_Main_Settings['auto_clear_logs'] . '!');
}
}
}
}
} else {
if (!wp_next_scheduled('crawlomaticactionclear')) {
delete_option('crawlomatic_schedule_time');
} else {
wp_clear_scheduled_hook('crawlomaticactionclear');
delete_option('crawlomatic_schedule_time');
}
}
if(isset($crawlomatic_Main_Settings['auto_update_posts']) && $crawlomatic_Main_Settings['auto_update_posts'] !== 'No')
{
if (!wp_next_scheduled( 'crawlomaticactionupdate'))
{
$rez = wp_schedule_event(time(), $crawlomatic_Main_Settings['auto_update_posts'], 'crawlomaticactionupdate' );
add_option('crawlomatic_schedule_update_time', $crawlomatic_Main_Settings['auto_update_posts']);
if($rez === FALSE)
{
crawlomatic_log_to_file ('[Scheduler] Failed to schedule crawlomaticactionupdate to ' . $crawlomatic_Main_Settings['auto_update_posts'] . '!', '0', 8, '1');
}
}
else
{
if (!get_option('crawlomatic_schedule_update_time')) {
wp_clear_scheduled_hook( 'crawlomaticactionupdate' );
$rez = wp_schedule_event(time(), $crawlomatic_Main_Settings['auto_update_posts'], 'crawlomaticactionupdate' );
add_option('crawlomatic_schedule_update_time', $crawlomatic_Main_Settings['auto_update_posts']);
if($rez === FALSE)
{
crawlomatic_log_to_file ('[Scheduler] Failed to schedule crawlomaticactionupdate to ' . $crawlomatic_Main_Settings['auto_update_posts'] . '!', '0', 8, '1');
}
}
else
{
$the_time = get_option('crawlomatic_schedule_update_time');
if($the_time != $crawlomatic_Main_Settings['auto_update_posts'])
{
wp_clear_scheduled_hook( 'crawlomaticactionupdate' );
delete_option('crawlomatic_schedule_update_time');
$rez = wp_schedule_event(time(), $crawlomatic_Main_Settings['auto_update_posts'], 'crawlomaticactionupdate' );
add_option('crawlomatic_schedule_update_time', $crawlomatic_Main_Settings['auto_update_posts']);
if($rez === FALSE)
{
crawlomatic_log_to_file ('[Scheduler] Failed to schedule crawlomaticactionupdate to ' . $crawlomatic_Main_Settings['auto_update_posts'] . '!', '0', 8, '1');
}
}
}
}
}
else
{
if (!wp_next_scheduled( 'crawlomaticactionupdate' ))
{
delete_option('crawlomatic_schedule_update_time');
}
else
{
wp_clear_scheduled_hook( 'crawlomaticactionupdate' );
delete_option('crawlomatic_schedule_update_time');
}
}
} else {
if (wp_next_scheduled('crawlomaticaction')) {
wp_clear_scheduled_hook('crawlomaticaction');
}
if (!wp_next_scheduled('crawlomaticactionclear')) {
delete_option('crawlomatic_schedule_time');
} else {
wp_clear_scheduled_hook('crawlomaticactionclear');
delete_option('crawlomatic_schedule_time');
}
if (!wp_next_scheduled( 'crawlomaticactionupdate' ))
{
delete_option('crawlomatic_schedule_update_time');
}
else
{
wp_clear_scheduled_hook( 'crawlomaticactionupdate' );
delete_option('crawlomatic_schedule_update_time');
}
}
}
function crawlomatic_cron()
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['crawlomatic_enabled']) && $crawlomatic_Main_Settings['crawlomatic_enabled'] === 'on') {
if (isset($crawlomatic_Main_Settings['auto_delete_enabled']) && $crawlomatic_Main_Settings['auto_delete_enabled'] === 'on') {
$postsPerPage = 50000;
$paged = 0;
do
{
$postOffset = $paged * $postsPerPage;
$post_stati = get_post_stati();
foreach ($post_stati as $key => $val) {
if ($val == 'auto-draft') {
unset($post_stati[$key]);
}
if ($val == 'inherit') {
unset($post_stati[$key]);
}
if ($val == 'request-pending') {
unset($post_stati[$key]);
}
if ($val == 'request-confirmed') {
unset($post_stati[$key]);
}
if ($val == 'request-failed') {
unset($post_stati[$key]);
}
if ($val == 'request-completed') {
unset($post_stati[$key]);
}
}
$query = array(
'post_status' => $post_stati,
'post_type' => array(
'any'
),
'numberposts' => $postsPerPage,
'fields' => 'ids',
'meta_key' => 'crawlomatic_delete_time',
'offset' => $postOffset
);
$post_list = get_posts($query);
$paged++;
wp_suspend_cache_addition(true);
foreach($post_list as $p)
{
$exp_time = get_post_meta($p, 'crawlomatic_delete_time', true);
if($exp_time != '' && $exp_time !== false)
{
if(time() > $exp_time)
{
$args = array(
'post_parent' => $p
);
$post_attachments = get_children($args);
if (isset($post_attachments) && !empty($post_attachments)) {
foreach ($post_attachments as $attachment) {
wp_delete_attachment($attachment->ID, true);
}
}
$res = wp_delete_post($p, true);
if ($res === false) {
crawlomatic_log_to_file('[Scheduler] Failed to automatically delete post ' . $p . ', exptime: ' . $exp_time . ', time: ' . time() . '!');
}
}
}
}
wp_suspend_cache_addition(false);
}while(!empty($post_list));
unset($post_list);
}
$GLOBALS['wp_object_cache']->delete('crawlomatic_running_list', 'options');
$running = get_option('crawlomatic_running_list');
$curr_time = time();
$update = false;
if(is_array($running))
{
foreach($running as $key => $value)
{
if(($curr_time - $key > 3600) && $key > 1000)
{
unset($running[$key]);
$update = true;
}
}
}
if($update === true)
{
update_option('crawlomatic_running_list', $running);
}
if (isset($crawlomatic_Main_Settings['run_after']) && $crawlomatic_Main_Settings['run_after'] != '' && isset($crawlomatic_Main_Settings['run_before']) && $crawlomatic_Main_Settings['run_before'] != '')
{
$exit = true;
$mytime = date("H:i");
$min_time = $crawlomatic_Main_Settings['run_after'];
$max_time = $crawlomatic_Main_Settings['run_before'];
$date1 = DateTime::createFromFormat('H:i', $mytime);
$date2 = DateTime::createFromFormat('H:i', $min_time);
$date3 = DateTime::createFromFormat('H:i', $max_time);
if ($date1 > $date2 && $date1 < $date3)
{
$exit = false;
}
if($exit == true)
{
return;
}
}
$GLOBALS['wp_object_cache']->delete('crawlomatic_rules_list', 'options');
if (!get_option('crawlomatic_rules_list')) {
$rules = array();
} else {
$rules = get_option('crawlomatic_rules_list');
}
$rule_run = false;
$unlocker = get_option('crawlomatic_minute_running_unlocked', false);
if (!empty($rules)) {
$cont = 0;
foreach ($rules as $request => $bundle[]) {
$bundle_values = array_values($bundle);
$myValues = $bundle_values[$cont];
$array_my_values = array_values($myValues);for($iji=0;$iji<count($array_my_values);++$iji){if(is_string($array_my_values[$iji])){$array_my_values[$iji]=stripslashes($array_my_values[$iji]);}}
$schedule = isset($array_my_values[1]) ? $array_my_values[1] : '24';
$active = isset($array_my_values[2]) ? $array_my_values[2] : '0';
$last_run = isset($array_my_values[3]) ? $array_my_values[3] : crawlomatic_get_date_now();
if ($active == '1') {
$now = crawlomatic_get_date_now();
if($unlocker == '1')
{
$nextrun = crawlomatic_add_minute($last_run, $schedule);
$crawlomatic_hour_diff = (int) crawlomatic_minute_diff($now, $nextrun);
}
else
{
$nextrun = crawlomatic_add_hour($last_run, $schedule);
$crawlomatic_hour_diff = (int) crawlomatic_hour_diff($now, $nextrun);
}
if ($crawlomatic_hour_diff >= 0) {
if($rule_run === false)
{
$rule_run = true;
}
else
{
if (isset($crawlomatic_Main_Settings['rule_delay']) && $crawlomatic_Main_Settings['rule_delay'] !== '')
{
sleep($crawlomatic_Main_Settings['rule_delay']);
}
}
crawlomatic_run_rule($cont);
}
}
$cont = $cont + 1;
}
$running = array();
update_option('crawlomatic_running_list', $running);
}
}
}
function crawlomatic_add_canonical()
{
global $post;
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['crawlomatic_enabled']) && $crawlomatic_Main_Settings['crawlomatic_enabled'] == 'on') {
if (isset($crawlomatic_Main_Settings['add_canonical']) && $crawlomatic_Main_Settings['add_canonical'] == 'on') {
if(is_single())
{
$source_url = get_post_meta($post->ID, 'crawlomatic_post_url', true);
if($source_url !== false && $source_url != '')
{
add_filter( 'wpseo_canonical', '__return_false' );
echo '<link rel="canonical" href="' . esc_url($source_url) . '" />';
}
}
}
}
}
function crawlomatic_log_to_file($str)
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['enable_logging']) && $crawlomatic_Main_Settings['enable_logging'] == 'on') {
$tz = crawlomatic_get_blog_timezone();
if($tz !== false)
date_default_timezone_set($tz->getName());
$d = date("j-M-Y H:i:s e", time());
error_log("[$d] " . $str . "<br/>
", 3, WP_CONTENT_DIR . '/crawlomatic_info.log');
if($tz !== false)
date_default_timezone_set('UTC');
}
}
function crawlomatic_delete_all_posts()
{
$failed = false;
$number = 0;
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$postsPerPage = 50000;
$paged = 0;
do
{
$postOffset = $paged * $postsPerPage;
$post_stati = get_post_stati();
foreach ($post_stati as $key => $val) {
if ($val == 'auto-draft') {
unset($post_stati[$key]);
}
if ($val == 'inherit') {
unset($post_stati[$key]);
}
if ($val == 'request-pending') {
unset($post_stati[$key]);
}
if ($val == 'request-confirmed') {
unset($post_stati[$key]);
}
if ($val == 'request-failed') {
unset($post_stati[$key]);
}
if ($val == 'request-completed') {
unset($post_stati[$key]);
}
}
$query = array(
'post_status' => $post_stati,
'post_type' => array(
'any'
),
'numberposts' => $postsPerPage,
'fields' => 'ids',
'meta_key' => 'crawlomatic_parent_rule',
'offset' => $postOffset
);
$post_list = get_posts($query);
$paged++;
wp_suspend_cache_addition(true);
foreach ($post_list as $post) {
$index = get_post_meta($post, 'crawlomatic_parent_rule', true);
if (isset($index) && $index !== '') {
$args = array(
'post_parent' => $post
);
$post_attachments = get_children($args);
if (isset($post_attachments) && !empty($post_attachments)) {
foreach ($post_attachments as $attachment) {
wp_delete_attachment($attachment->ID, true);
}
}
$res = wp_delete_post($post, true);
if ($res === false) {
$failed = true;
} else {
$number++;
}
}
}
wp_suspend_cache_addition(false);
}while(!empty($post_list));
unset($post_list);
if ($failed === true) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('[PostDelete] Failed to delete all posts!');
}
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('[PostDelete] Successfuly deleted ' . esc_html($number) . ' posts!');
}
}
}
function crawlomatic_delete_all_rules()
{
update_option('crawlomatic_rules_list', array());
}
function crawlomatic_replaceContentShortcodes($the_content, $just_title, $content, $item_url, $item_cat, $item_tags, $item_image, $description, $read_more, $date, $item_price, $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $image_gallery, $gallery_regex = '', $replace_gallery_regex = '', $regular_price = '', $regular_price_multi = '')
{
$matches = array();
$i = 0;
preg_match_all('~%regex\(\s*\"([^"]+?)\s*"\s*[,;]\s*\"([^"]*)\"\s*(?:[,;]\s*\"([^"]*?)\s*\")?(?:[,;]\s*\"([^"]*?)\s*\")?(?:[,;]\s*\"([^"]*?)\s*\")?\)%~si', $the_content, $matches);
if (is_array($matches) && count($matches) && is_array($matches[0])) {
for($i = 0; $i < count($matches[0]); $i++)
{
if (isset($matches[0][$i])) $fullmatch = $matches[0][$i];
if (isset($matches[1][$i])) $search_in = crawlomatic_replaceContentShortcodes($matches[1][$i], $just_title, $content, $item_url, $item_cat, $item_tags, $item_image, $description, $read_more, $date, $item_price, $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $image_gallery, $gallery_regex, $replace_gallery_regex, $regular_price, $regular_price_multi);
if (isset($matches[2][$i])) $matchpattern = $matches[2][$i];
if (isset($matches[3][$i])) $element = $matches[3][$i];
if (isset($matches[4][$i])) $delimeter = $matches[4][$i];if (isset($matches[5][$i])) $counter = $matches[5][$i];
if (isset($matchpattern)) {
if (preg_match('<^[\/#%+~[\]{}][\s\S]*[\/#%+~[\]{}]$>', $matchpattern, $z)) {
$ret = preg_match_all($matchpattern, $search_in, $submatches, PREG_PATTERN_ORDER);
}
else {
$ret = preg_match_all('~'.$matchpattern.'~si', $search_in, $submatches, PREG_PATTERN_ORDER);
}
}
if (isset($submatches)) {
if (is_array($submatches)) {
$empty_elements = array_keys($submatches[0], "");
foreach ($empty_elements as $e) {
unset($submatches[0][$e]);
}
$submatches[0] = array_unique($submatches[0]);
if (!is_numeric($element)) {
$element = 0;
}if (!is_numeric($counter)) {
$counter = 0;
}
if(isset($submatches[(int)($element)]))
{
$matched = $submatches[(int)($element)];
}
else
{
$matched = '';
}
$matched = array_unique((array)$matched);
if (empty($delimeter) || $delimeter == 'null') {
if (isset($matched[$counter])) $matched = $matched[$counter];
}
else {
$matched = implode($delimeter, $matched);
}
if (empty($matched)) {
$the_content = str_replace($fullmatch, '', $the_content);
} else {
$the_content = str_replace($fullmatch, $matched, $the_content);
}
}
}
}
}
preg_match_all('~%regextext\(\s*\"([^"]+?)\s*"\s*,\s*\"([^"]*)\"\s*(?:,\s*\"([^"]*?)\s*\")?(?:,\s*\"([^"]*?)\s*\")?(?:,\s*\"([^"]*?)\s*\")?\)%~si', $the_content, $matches);
if (is_array($matches) && count($matches) && is_array($matches[0])) {
for($i = 0; $i < count($matches[0]); $i++)
{
if (isset($matches[0][$i])) $fullmatch = $matches[0][$i];
if (isset($matches[1][$i])) $search_in = crawlomatic_replaceContentShortcodes($matches[1][$i], $just_title, $content, $item_url, $item_cat, $item_tags, $item_image, $description, $read_more, $date, $item_price, $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $image_gallery, $gallery_regex, $replace_gallery_regex, $regular_price, $regular_price_multi);
if (isset($matches[2][$i])) $matchpattern = $matches[2][$i];
if (isset($matches[3][$i])) $element = $matches[3][$i];
if (isset($matches[4][$i])) $delimeter = $matches[4][$i];if (isset($matches[5][$i])) $counter = $matches[5][$i];
$search_in = strip_tags($search_in, '<p><br>');
$search_in1 = preg_replace("/<p[^>]*?>/", "", $search_in);
if($search_in1 !== null)
{
$search_in = $search_in1;
}
$search_in = str_replace("</p>", "<br />", $search_in);
$search_in1 = preg_replace('/\<br(\s*)?\/?\>/i', "
", $search_in);
if($search_in1 !== null)
{
$search_in = $search_in1;
}
$search_in1 = preg_replace('/^(?:
|
|
)+/', '', $search_in);
if($search_in1 !== null)
{
$search_in = $search_in1;
}
if (isset($matchpattern)) {
if (preg_match('<^[\/#%+~[\]{}][\s\S]*[\/#%+~[\]{}]$>', $matchpattern, $z)) {
$ret = preg_match_all($matchpattern, $search_in, $submatches, PREG_PATTERN_ORDER);
}
else {
$ret = preg_match_all('~'.$matchpattern.'~si', $search_in, $submatches, PREG_PATTERN_ORDER);
}
}
if (isset($submatches)) {
if (is_array($submatches)) {
$empty_elements = array_keys($submatches[0], "");
foreach ($empty_elements as $e) {
unset($submatches[0][$e]);
}
$submatches[0] = array_unique($submatches[0]);
if (!is_numeric($element)) {
$element = 0;
}if (!is_numeric($counter)) {
$counter = 0;
}
if(isset($submatches[(int)($element)]))
{
$matched = $submatches[(int)($element)];
}
else
{
$matched = '';
}
$matched = array_unique((array)$matched);
if (empty($delimeter) || $delimeter == 'null') {
if (isset($matched[$counter])) $matched = $matched[$counter];
}
else {
$matched = implode($delimeter, $matched);
}
if (empty($matched)) {
$the_content = str_replace($fullmatch, '', $the_content);
} else {
$the_content = str_replace($fullmatch, $matched, $the_content);
}
}
}
}
}
$spintax = new Crawlomatic_Spintax();
$the_content = $spintax->process($the_content);
$pcxxx = explode('<!- template ->', $the_content);
$the_content = $pcxxx[array_rand($pcxxx)];
$the_content = str_replace('%%random_sentence%%', crawlomatic_random_sentence_generator(), $the_content);
$the_content = str_replace('%%random_sentence2%%', crawlomatic_random_sentence_generator(false), $the_content);
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['custom_html'])) {
$xspintax = html_entity_decode($crawlomatic_Main_Settings['custom_html']);
$spintax = new Crawlomatic_Spintax();
$xspintax = $spintax->process($xspintax);
$the_content = str_replace('%%custom_html%%', $xspintax, $the_content);
}
if (isset($crawlomatic_Main_Settings['custom_html2'])) {
$xspintax2 = html_entity_decode($crawlomatic_Main_Settings['custom_html2']);
$spintax = new Crawlomatic_Spintax();
$xspintax2 = $spintax->process($xspintax2);
$the_content = str_replace('%%custom_html2%%', $xspintax2, $the_content);
}
$the_content = str_replace('%%item_title%%', $just_title, $the_content);
$the_content = str_replace('%%current_date%%', date('Y-m-d', time()), $the_content);
$the_content = str_replace('%%current_time%%', date('H:i:s', time()), $the_content);
$the_content = str_replace('%%item_content%%', $content, $the_content);
$the_content = str_replace('%%item_url%%', $item_url . $append_urls, $the_content);
$the_content = str_replace('%%item_cat%%', $item_cat, $the_content);
$img_attr = str_replace('%%image_source_name%%', '', $img_attr);
$img_attr = str_replace('%%image_source_url%%', '', $img_attr);
$img_attr = str_replace('%%image_source_website%%', '', $img_attr);
$the_content = str_replace('%%royalty_free_image_attribution%%', $img_attr, $the_content);
if($item_price !== false)
{
$the_content = str_replace('%%item_original_price%%', $item_price, $the_content);
$the_content = str_replace('%%item_price%%', $item_price_multi, $the_content);
}
else
{
$the_content = str_replace('%%item_original_price%%', '', $the_content);
$the_content = str_replace('%%item_price%%', '', $the_content);
}
if($regular_price !== false)
{
$the_content = str_replace('%%item_original_regular_price%%', $regular_price, $the_content);
$the_content = str_replace('%%item_regular_price%%', $regular_price_multi, $the_content);
}
else
{
$the_content = str_replace('%%item_original_regular_price%%', '', $the_content);
$the_content = str_replace('%%item_regular_price%%', '', $the_content);
}
$the_content = str_replace('%%item_tags%%', $item_tags, $the_content);
$the_content = str_replace('%%item_content_plain_text%%', crawlomatic_getPlainContent($content), $the_content);
$the_content = str_replace('%%item_read_more_button%%', crawlomatic_getReadMoreButton($item_url . $append_urls, $read_more), $the_content);
$the_content = str_replace('%%item_show_image%%', crawlomatic_getItemImage($item_image, $just_title), $the_content);
$the_content = str_replace('%%item_image_URL%%', $item_image, $the_content);
$the_content = str_replace('%%item_description%%', $description, $the_content);
$the_content = str_replace('%%item_excerpt%%', $description, $the_content);
$item_all_images = '';
if(is_array($image_gallery))
{
for($cnti = 0;$cnti < count($image_gallery); $cnti++)
{
$gimg = htmlspecialchars_decode($image_gallery[$cnti]);
if ($gallery_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $gallery_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_gallery_regex);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_cont_gallery = preg_replace("~" . $sbr . "~i", $repreg, $gimg);
if($temp_cont_gallery !== NULL)
{
$gimg = $temp_cont_gallery;
}
}
}
$image_gallery[$cnti] = $gimg;
}
$image_gallery = array_unique($image_gallery);
foreach($image_gallery as $img)
{
$item_all_images .= '<div><img src="' . esc_url($img) . '" alt="image" class="crf_img_set" onclick=\'window.open("' . esc_url($img) . '");\'/></div>';
}
}
if($item_all_images != '')
{
$item_all_images = '<div class="crawlomatic_gallery crawlomatic_cf">' . $item_all_images . '</div>';
}
$the_content = str_replace('%%item_gallery%%', $item_all_images, $the_content);
$the_content = str_replace('%%item_pub_date%%', $date, $the_content);
if(isset($item_download[0]))
{
$the_content = str_replace('%%downloaded_file%%', $item_download[0], $the_content);
}
else
{
$the_content = str_replace('%%downloaded_file%%', '', $the_content);
}
for($j = 0; $j < count($item_download); $j++)
{
$the_content = str_replace('%%downloaded_file[' . $j . ']%%', $item_download[$j], $the_content);
}
$the_content = preg_replace("#%%downloaded_file\[[\d]+?\]%%#", "", $the_content);
foreach($custom_shortcodes_arr as $index => $csa)
{
$the_content = str_replace('%%' . $index . '%%', $csa, $the_content);
}
if($screenimageURL != '')
{
$the_content = str_replace('%%item_screenshot_url%%', esc_url($screenimageURL), $the_content);
$the_content = str_replace('%%item_show_screenshot%%', crawlomatic_getItemImage(esc_url($screenimageURL), $just_title), $the_content);
}
else
{
$snap = 'http://s.wordpress.com/mshots/v1/';
if (isset($crawlomatic_Main_Settings['screenshot_height']) && $crawlomatic_Main_Settings['screenshot_height'] != '')
{
$h = esc_attr($crawlomatic_Main_Settings['screenshot_height']);
}
else
{
$h = '450';
}
if (isset($crawlomatic_Main_Settings['screenshot_width']) && $crawlomatic_Main_Settings['screenshot_width'] != '')
{
$w = esc_attr($crawlomatic_Main_Settings['screenshot_width']);
}
else
{
$w = '600';
}
$the_content = str_replace('%%item_screenshot_url%%', esc_url($snap . urlencode($item_url) . '?w=' . $w . '&h=' . $h), $the_content);
$the_content = str_replace('%%item_show_screenshot%%', crawlomatic_getItemImage(esc_url($snap . urlencode($item_url) . '?w=' . $w . '&h=' . $h), $just_title), $the_content);
}
preg_match_all('~%get_final_url\s*\(\s*([^\)]*?)\s*\)\s*%~si', $the_content, $zmatches);
for($i = 0; $i < count($zmatches[0]); $i++)
{
$fullmatch = '';
$search_in = '';
if (isset($zmatches[0][$i]))
{
$fullmatch = $zmatches[0][$i];
}
if (isset($zmatches[1][$i]))
{
$search_in = crawlomatic_findUltimateDestination2(trim($zmatches[1][$i]), $crawlomatic_Main_Settings, 25);
}
if($fullmatch != '')
{
$the_content = str_replace($fullmatch, $search_in, $the_content);
}
}
$the_content = preg_replace_callback('#%%random_image_url\[([^\]]*?)\]%%#', function ($matches) {
$my_img = crawlomatic_get_random_image_google($matches[1]);
return $my_img;
}, $the_content);
$the_content = preg_replace_callback('#%%random_image\[([^\]]*?)\]%%#', function ($matches) {
$my_img = crawlomatic_get_random_image_google($matches[1]);
return '<img src="' . $my_img . '">';
}, $the_content);
$the_content = crawlomatic_replaceSynergyShortcodes($the_content);
return $the_content;
}
function crawlomatic_findUltimateDestination2($url, $crawlomatic_Main_Settings, $maxRequests = 25)
{
$ch = curl_init();
if($ch === false)
{
return '';
}
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
curl_setopt($ch, CURLOPT_USERAGENT, crawlomatic_get_random_user_agent());
if (isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$options = array();
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$options[CURLOPT_PROXY] = trim($prx[$randomness]);
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$options[CURLOPT_PROXYUSERPWD] = trim($prx_auth[$randomness]);
}
}
curl_setopt_array($ch, $options);
}
while ($maxRequests--)
{
curl_setopt($ch, CURLOPT_URL, $url);
$response = curl_exec($ch);
$location = '';
if (in_array(curl_getinfo($ch, CURLINFO_HTTP_CODE), [301, 302, 303, 307, 308]))
{
if (preg_match('/Location:(.*)/i', $response, $match)) {
$location = trim($match[1]);
}
}
if (empty($location))
{
return $url;
}
if ($location[0] == '/')
{
$u = parse_url($url);
$url = $u['scheme'] . '://' . $u['host'];
if (isset($u['port'])) {
$url .= ':' . $u['port'];
}
$url .= $location;
}
else
{
$url = $location;
}
}
return '';
}
function crawlomatic_findUltimateDestination($url, $crawlomatic_Main_Settings, $maxRequests = 25)
{
if(empty($url))
{
return $url;
}
$ch = curl_init();
if($ch === false)
{
return $url;
}
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, $maxRequests);
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
if (isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$options = array();
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$options[CURLOPT_PROXY] = trim($prx[$randomness]);
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$options[CURLOPT_PROXYUSERPWD] = trim($prx_auth[$randomness]);
}
}
curl_setopt_array($ch, $options);
}
curl_setopt($ch, CURLOPT_USERAGENT, crawlomatic_get_random_user_agent());
curl_setopt($ch, CURLOPT_URL, $url);
$ret = curl_exec($ch);
if($ret === false)
{
curl_close ($ch);
return $url;
}
$url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
curl_close ($ch);
return $url;
}
function crawlomatic_replaceTitleShortcodes($the_content, $just_title, $content, $item_url, $item_cat, $item_tags, $custom_shortcodes_arr)
{
$matches = array();
$i = 0;
preg_match_all('~%regex\(\s*\"([^"]+?)\s*"\s*[,;]\s*\"([^"]*)\"\s*(?:[,;]\s*\"([^"]*?)\s*\")?(?:[,;]\s*\"([^"]*?)\s*\")?(?:[,;]\s*\"([^"]*?)\s*\")?\)%~si', $the_content, $matches);
if (is_array($matches) && count($matches) && is_array($matches[0])) {
for($i = 0; $i < count($matches[0]); $i++)
{
if (isset($matches[0][$i])) $fullmatch = $matches[0][$i];
if (isset($matches[1][$i])) $search_in = crawlomatic_replaceTitleShortcodes($matches[1][$i], $just_title, $content, $item_url, $item_cat, $item_tags, $custom_shortcodes_arr);
if (isset($matches[2][$i])) $matchpattern = $matches[2][$i];
if (isset($matches[3][$i])) $element = $matches[3][$i];
if (isset($matches[4][$i])) $delimeter = $matches[4][$i];if (isset($matches[5][$i])) $counter = $matches[5][$i];
if (isset($matchpattern)) {
if (preg_match('<^[\/#%+~[\]{}][\s\S]*[\/#%+~[\]{}]$>', $matchpattern, $z)) {
$ret = preg_match_all($matchpattern, $search_in, $submatches, PREG_PATTERN_ORDER);
}
else {
$ret = preg_match_all('~'.$matchpattern.'~si', $search_in, $submatches, PREG_PATTERN_ORDER);
}
}
if (isset($submatches)) {
if (is_array($submatches)) {
$empty_elements = array_keys($submatches[0], "");
foreach ($empty_elements as $e) {
unset($submatches[0][$e]);
}
$submatches[0] = array_unique($submatches[0]);
if (!is_numeric($element)) {
$element = 0;
}if (!is_numeric($counter)) {
$counter = 0;
}
if(isset($submatches[(int)($element)]))
{
$matched = $submatches[(int)($element)];
}
else
{
$matched = '';
}
$matched = array_unique((array)$matched);
if (empty($delimeter) || $delimeter == 'null') {
if (isset($matched[$counter])) $matched = $matched[$counter];
}
else {
$matched = implode($delimeter, $matched);
}
if (empty($matched)) {
$the_content = str_replace($fullmatch, '', $the_content);
} else {
$the_content = str_replace($fullmatch, $matched, $the_content);
}
}
}
}
}
preg_match_all('~%regextext\(\s*\"([^"]+?)\s*"\s*,\s*\"([^"]*)\"\s*(?:,\s*\"([^"]*?)\s*\")?(?:,\s*\"([^"]*?)\s*\")?(?:,\s*\"([^"]*?)\s*\")?\)%~si', $the_content, $matches);
if (is_array($matches) && count($matches) && is_array($matches[0])) {
for($i = 0; $i < count($matches[0]); $i++)
{
if (isset($matches[0][$i])) $fullmatch = $matches[0][$i];
if (isset($matches[1][$i])) $search_in = crawlomatic_replaceTitleShortcodes($matches[1][$i], $just_title, $content, $item_url, $item_cat, $item_tags, $custom_shortcodes_arr);
if (isset($matches[2][$i])) $matchpattern = $matches[2][$i];
if (isset($matches[3][$i])) $element = $matches[3][$i];
if (isset($matches[4][$i])) $delimeter = $matches[4][$i];if (isset($matches[5][$i])) $counter = $matches[5][$i];
$search_in = strip_tags($search_in, '<p><br>');
$search_in1 = preg_replace("/<p[^>]*?>/", "", $search_in);
if($search_in1 !== null)
{
$search_in = $search_in1;
}
$search_in = str_replace("</p>", "<br />", $search_in);
$search_in1 = preg_replace('/\<br(\s*)?\/?\>/i', "
", $search_in);
if($search_in1 !== null)
{
$search_in = $search_in1;
}
$search_in1 = preg_replace('/^(?:
|
|
)+/', '', $search_in);
if($search_in1 !== null)
{
$search_in = $search_in1;
}
if (isset($matchpattern)) {
if (preg_match('<^[\/#%+~[\]{}][\s\S]*[\/#%+~[\]{}]$>', $matchpattern, $z)) {
$ret = preg_match_all($matchpattern, $search_in, $submatches, PREG_PATTERN_ORDER);
}
else {
$ret = preg_match_all('~'.$matchpattern.'~si', $search_in, $submatches, PREG_PATTERN_ORDER);
}
}
if (isset($submatches)) {
if (is_array($submatches)) {
$empty_elements = array_keys($submatches[0], "");
foreach ($empty_elements as $e) {
unset($submatches[0][$e]);
}
$submatches[0] = array_unique($submatches[0]);
if (!is_numeric($element)) {
$element = 0;
}if (!is_numeric($counter)) {
$counter = 0;
}
if(isset($submatches[(int)($element)]))
{
$matched = $submatches[(int)($element)];
}
else
{
$matched = '';
}
$matched = array_unique((array)$matched);
if (empty($delimeter) || $delimeter == 'null') {
if (isset($matched[$counter])) $matched = $matched[$counter];
}
else {
$matched = implode($delimeter, $matched);
}
if (empty($matched)) {
$the_content = str_replace($fullmatch, '', $the_content);
} else {
$the_content = str_replace($fullmatch, $matched, $the_content);
}
}
}
}
}
$spintax = new Crawlomatic_Spintax();
$the_content = $spintax->process($the_content);
$pcxxx = explode('<!- template ->', $the_content);
$the_content = $pcxxx[array_rand($pcxxx)];
$the_content = str_replace('%%current_date%%', date('Y-m-d', time()), $the_content);
$the_content = str_replace('%%current_time%%', date('H:i:s', time()), $the_content);
$the_content = str_replace('%%random_sentence%%', crawlomatic_random_sentence_generator(), $the_content);
$the_content = str_replace('%%random_sentence2%%', crawlomatic_random_sentence_generator(false), $the_content);
$the_content = str_replace('%%item_title%%', $just_title, $the_content);
$the_content = str_replace('%%item_description%%', $content, $the_content);
$the_content = str_replace('%%item_url%%', $item_url, $the_content);
$the_content = str_replace('%%item_cat%%', $item_cat, $the_content);
$the_content = str_replace('%%item_tags%%', $item_tags, $the_content);
foreach($custom_shortcodes_arr as $index => $csa)
{
$the_content = str_replace('%%' . $index . '%%', $csa, $the_content);
}
$the_content = crawlomatic_replaceSynergyShortcodes($the_content);
return $the_content;
}
add_action('wp_head', 'crawlomatic_add_canonical');
add_action('wp_ajax_crawlomatic_my_action', 'crawlomatic_my_action_callback');
function crawlomatic_my_action_callback()
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$failed = false;
$del_id = $_POST['id'];
$how = $_POST['how'];
if($how == 'duplicate')
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_rules_list', 'options');
if (!get_option('crawlomatic_rules_list')) {
$rules = array();
} else {
$rules = get_option('crawlomatic_rules_list');
}
if (!empty($rules)) {
$found = 0;
$cont = 0;
foreach ($rules as $request => $bundle[]) {
if ($cont == $del_id) {
$copy_bundle = $rules[$request];
$copy_bundle[33] = uniqid('', true);
$rules[] = $copy_bundle;
$found = 1;
break;
}
$cont = $cont + 1;
}
if($found == 0)
{
crawlomatic_log_to_file('crawlomatic_rules_list index not found: ' . $del_id);
echo 'nochange';
die();
}
else
{
update_option('crawlomatic_rules_list', $rules, false);
echo 'ok';
die();
}
} else {
crawlomatic_log_to_file('crawlomatic_rules_list empty!');
echo 'nochange';
die();
}
}
$force_delete = true;
$number = 0;
if ($how == 'trash') {
$force_delete = false;
}
$postsPerPage = 50000;
$paged = 0;
do
{
$postOffset = $paged * $postsPerPage;
$post_stati = get_post_stati();
foreach ($post_stati as $key => $val) {
if ($val == 'auto-draft') {
unset($post_stati[$key]);
}
if ($val == 'inherit') {
unset($post_stati[$key]);
}
if ($val == 'request-pending') {
unset($post_stati[$key]);
}
if ($val == 'request-confirmed') {
unset($post_stati[$key]);
}
if ($val == 'request-failed') {
unset($post_stati[$key]);
}
if ($val == 'request-completed') {
unset($post_stati[$key]);
}
}
$query = array(
'post_status' => $post_stati,
'post_type' => array(
'any'
),
'numberposts' => $postsPerPage,
'fields' => 'ids',
'meta_key' => 'crawlomatic_parent_rule',
'offset' => $postOffset
);
$post_list = get_posts($query);
$paged++;
wp_suspend_cache_addition(true);
foreach ($post_list as $post) {
$index = get_post_meta($post, 'crawlomatic_parent_rule', true);
if ($index == $del_id) {
$args = array(
'post_parent' => $post
);
$post_attachments = get_children($args);
if (isset($post_attachments) && !empty($post_attachments)) {
foreach ($post_attachments as $attachment) {
wp_delete_attachment($attachment->ID, true);
}
}
$res = wp_delete_post($post, $force_delete);
if ($res === false) {
$failed = true;
} else {
$number++;
}
}
}
wp_suspend_cache_addition(false);
}while(!empty($post_list));
unset($post_list);
if ($failed === true) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('[PostDelete] Failed to delete all posts for rule id: ' . esc_html($del_id) . '!');
}
echo 'failed';
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('[PostDelete] Successfuly deleted ' . esc_html($number) . ' posts for rule id: ' . esc_html($del_id) . '!');
}
if ($number == 0) {
echo 'nochange';
} else {
echo 'ok';
}
}
die();
}
add_action( 'wp_ajax_crawlomatic_iframe', 'crawlomatic_iframe_callback' );
function crawlomatic_iframe_callback() {
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if(!current_user_can('administrator')) die();
$started = '%3Cs';
$failed_child_crawl = '';
$url = null;
$new_url = '';
$cookie = isset($_GET['crawlCookie']) ? $_GET['crawlCookie'] : '' ;
$clickelement = isset($_GET['clickelement']) ? $_GET['clickelement'] : '' ;
$use_phantom = isset($_GET['usephantom']) ? $_GET['usephantom'] : '' ;
$customUA = isset($_GET['customUA']) ? $_GET['customUA'] : '' ;
$htuser = isset($_GET['htuser']) ? $_GET['htuser'] : '' ;
$phantom_wait = isset($_GET['phantom_wait']) ? $_GET['phantom_wait'] : '' ;
$request_delay = isset($_GET['request_delay']) ? $_GET['request_delay'] : '' ;
$scripter = isset($_GET['scripter']) ? $_GET['scripter'] : '' ;
$local_storage = isset($_GET['local_storage']) ? $_GET['local_storage'] : '' ;
$auto_captcha = isset($_GET['auto_captcha']) ? $_GET['auto_captcha'] : '' ;
$enable_adblock = isset($_GET['enable_adblock']) ? $_GET['enable_adblock'] : '' ;
$post_fields = isset($_GET['post_fields']) ? $_GET['post_fields'] : '' ;
$url = $_GET['address'];
if($customUA == 'random')
{
$customUA = crawlomatic_get_random_user_agent();
}
if ( !$url ) {
crawlomatic_log_to_file('URL field empty when using Visual Selector.');
exit();
}
$content = false;
if(isset($_GET['crawl_children']) && $_GET['crawl_children'] != '' && $_GET['crawl_children'] != 'false')
{
$seed_type = stripslashes($_GET['crawl_children']);
}
else
{
$seed_type = '';
}
if($seed_type != 'search')
{
$got_phantom = false;
if($seed_type != 'sitemap')
{
if($use_phantom == '1')
{
$content = crawlomatic_get_page_PhantomJS($url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage);
if($content !== false)
{
$got_phantom = true;
}
}
elseif($use_phantom == '2')
{
$content = crawlomatic_get_page_Puppeteer($url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage);
if($content !== false)
{
$got_phantom = true;
}
}
elseif($use_phantom == '3')
{
$content = crawlomatic_get_page_Tor($url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage);
if($content !== false)
{
$got_phantom = true;
}
}
elseif($use_phantom == '4')
{
$content = crawlomatic_get_page_PuppeteerAPI($url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage, $auto_captcha, $enable_adblock, $clickelement);
if($content !== false)
{
$got_phantom = true;
}
}
elseif($use_phantom == '5')
{
$content = crawlomatic_get_page_TorAPI($url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage, $auto_captcha, $enable_adblock, $clickelement);
if($content !== false)
{
$got_phantom = true;
}
}
elseif($use_phantom == '6')
{
$content = crawlomatic_get_page_PhantomJSAPI($url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage);
if($content !== false)
{
$got_phantom = true;
}
}
}
if($got_phantom === false)
{
if ($seed_type == 'sitemap' || !crawlomatic_check_if_phantom($use_phantom) || !isset($crawlomatic_Main_Settings['disable_fallback']) || $crawlomatic_Main_Settings['disable_fallback'] != 'on')
{
$content = crawlomatic_get_web_page($url, $cookie, $customUA, '1', $htuser, '', $post_fields, $request_delay);
}
}
if ( empty($content) ) {
crawlomatic_log_to_file('Failed to get page when using Visual Selector: ' . esc_url($url));
echo 'Failed to get page when using Visual Selector: ' . esc_url($url);
header('404 Not Found');
exit();
}
}
else
{
$content = '';
}
if(isset($_GET['crawl_children']) && $_GET['crawl_children'] != '' && $_GET['crawl_children'] != 'false' && ($seed_type == 'search'|| $seed_type == 'rss' || $seed_type == 'sitemap' || $seed_type == 'txt' || (isset($_GET['crawl_children_expression']) && $_GET['crawl_children_expression'] != '' && $_GET['crawl_children_expression'] != 'false')))
{
$anchors = array();
if($seed_type == 'sitemap')
{
require_once (dirname(__FILE__) . "/res/SitemapParser-master/src/SitemapParser/Exceptions/SitemapParserException.php");
require_once (dirname(__FILE__) . "/res/SitemapParser-master/src/SitemapParser/Exceptions/TransferException.php");
require_once (dirname(__FILE__) . "/res/SitemapParser-master/src/SitemapParser/UrlParser.php");
require_once (dirname(__FILE__) . "/res/SitemapParser-master/src/SitemapParser.php");
}
$seed_expre = stripslashes($_GET['crawl_children_expression']);
$dom = new DOMDocument('1.0');
$internalErrors = libxml_use_internal_errors(true);
$dom->loadHTML('<?xml encoding="utf-8" ?>' . $content);
libxml_use_internal_errors($internalErrors);
if($seed_expre != '' || $seed_type == 'sitemap' || $seed_type == 'rss' || $seed_type == 'search' || $seed_type == 'txt' || $seed_type == 'auto')
{
if ($seed_type == 'xpath' || $seed_type == 'visual') {
$dom_xpath = new DOMXpath($dom);
$elements = $dom_xpath->query($seed_expre);
if($elements != false)
{
foreach($elements as $el) {
if(isset($el->tagName) && $el->tagName === 'a')
{
$anchors[] = $el;
}
else
{
$ancs = $el->getElementsByTagName('a');
foreach($ancs as $as)
{
$anchors[] = $as;
}
}
}
}
}
else
{
if($seed_type == 'regex')
{
$matches = array();
$rezu = preg_match_all($seed_expre, $content, $matches);
if($rezu == false)
{
$rez = preg_match_all('~' . $seed_expre . '~', $content, $matches);
}
if(isset($matches[0][0]))
{
foreach ($matches[0] as $match) {
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($match));
$anchors[] = $el;
$el = '';
}
}
else
{
if(crawlomatic_isRegularExpression($seed_expre) === false)
{
crawlomatic_log_to_file('Incorrect regex entered: ' . $seed_expre);
}
}
}
elseif($seed_type == 'regex2')
{
$matches = array();
$rezu = preg_match_all($seed_expre, $content, $matches);
if($rezu == false)
{
$rez = preg_match_all('~' . $seed_expre . '~', $content, $matches);
}
if(isset($matches[1][0]))
{
for ($i = 1; $i < count($matches); $i++)
{
foreach ($matches[$i] as $match) {
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($match));
$anchors[] = $el;
$el = '';
}
}
}
else
{
if(crawlomatic_isRegularExpression($seed_expre) === false)
{
crawlomatic_log_to_file('Incorrect regex entered: ' . $seed_expre);
}
}
}
elseif($seed_type == 'gnews')
{
$matches = array();
$rezu = preg_match_all('#href="\.(\/articles\/(?:[^"])*?)"#', $content, $matches);
if(isset($matches[1][0]))
{
for ($i = 1; $i < count($matches); $i++)
{
foreach ($matches[$i] as $match) {
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', 'https://news.google.com' . trim($match));
$anchors[] = $el;
$el = '';
}
}
}
else
{
crawlomatic_log_to_file('Google returned no links.');
}
}
elseif($seed_type == 'id')
{
$dom_xpath = new DOMXpath($dom);
$elements = $dom_xpath->query('//*[@'.$seed_type.'="'.trim($seed_expre).'"]');
if($elements != false)
{
foreach($elements as $el) {
if(isset($el->tagName) && $el->tagName === 'a')
{
$anchors[] = $el;
}
else
{
$ancs = $el->getElementsByTagName('a');
foreach($ancs as $as)
{
$anchors[] = $as;
}
}
}
}
}
elseif($seed_type == 'class')
{
$dom_xpath = new DOMXpath($dom);
$elements = $dom_xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " '.trim($seed_expre).' ")]');
if($elements != false)
{
foreach($elements as $el) {
if(isset($el->tagName) && $el->tagName === 'a')
{
$anchors[] = $el;
}
else
{
$ancs = $el->getElementsByTagName('a');
foreach($ancs as $as)
{
$anchors[] = $as;
}
}
}
}
}
elseif($seed_type == 'rss')
{
try
{
$ulrs = crawlomatic_get_rss_feed_links($content, $url);
foreach ($ulrs as $idxrss => $xxurl)
{
if(trim($seed_expre) == '*' || trim($seed_expre) == '')
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($xxurl));
$anchors[] = $el;
$el = '';
}
else
{
if(preg_match(trim($seed_expre), $xxurl))
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($xxurl));
$anchors[] = $el;
$el = '';
}
else
{
if(preg_match('~' . trim($seed_expre) . '~', $xxurl))
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($xxurl));
$anchors[] = $el;
$el = '';
}
}
}
}
} catch (SitemapParserException $e) {
crawlomatic_log_to_file('Failed to parse RSS Feed: ' . $url . ' - error: ' . $e->getMessage());
}
}
elseif($seed_type == 'txt')
{
try
{
$ulrs = preg_split('/
|
|
/', $content);
foreach ($ulrs as $idxrss => $xxurl)
{
if(trim($seed_expre) == '*' || trim($seed_expre) == '')
{
if ( (!isset($crawlomatic_Main_Settings['no_valid_link']) || $crawlomatic_Main_Settings['no_valid_link'] != 'on') && !filter_var( $xxurl, FILTER_VALIDATE_URL ) )
{
crawlomatic_log_to_file('TXT File line not a valid URL1, skipping it: ' . $xxurl);
}
else
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($xxurl));
$anchors[] = $el;
$el = '';
}
}
else
{
if(preg_match(trim($seed_expre), $xxurl))
{
if ( (!isset($crawlomatic_Main_Settings['no_valid_link']) || $crawlomatic_Main_Settings['no_valid_link'] != 'on') && !filter_var( $xxurl, FILTER_VALIDATE_URL ) )
{
crawlomatic_log_to_file('TXT File line not a valid URL2, skipping it: ' . $xxurl);
}
else
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($xxurl));
$anchors[] = $el;
$el = '';
}
}
else
{
if(preg_match('~' . trim($seed_expre) . '~', $xxurl))
{
if ( (!isset($crawlomatic_Main_Settings['no_valid_link']) || $crawlomatic_Main_Settings['no_valid_link'] != 'on') && !filter_var( $xxurl, FILTER_VALIDATE_URL ) )
{
crawlomatic_log_to_file('TXT File line not a valid URL3, skipping it: ' . $xxurl);
}
else
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($xxurl));
$anchors[] = $el;
$el = '';
}
}
}
}
}
} catch (SitemapParserException $e) {
crawlomatic_log_to_file('Failed to parse RSS Feed: ' . $url . ' - error: ' . $e->getMessage());
}
}
elseif($seed_type == 'sitemap')
{
try {
$parser = new SitemapParser();
$parser->parseRecursive($url, $content, $customUA, $cookie, '1', $htuser);
foreach ($parser->getURLs() as $xxurl => $xxtags)
{
if(trim($seed_expre) == '*' || trim($seed_expre) == '')
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($xxurl));
$anchors[] = $el;
$el = '';
}
else
{
if(preg_match(trim($seed_expre), $xxurl))
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($xxurl));
$anchors[] = $el;
$el = '';
}
else
{
if(preg_match('~' . trim($seed_expre) . '~', $xxurl))
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($xxurl));
$anchors[] = $el;
$el = '';
}
}
}
}
} catch (SitemapParserException $e) {
crawlomatic_log_to_file('Failed to parse sitemap: ' . $url . ' - error: ' . $e->getMessage());
}
}
elseif($seed_type == 'search')
{
$links_arr = crawlomatic_discover_links( $url, 1 );
if(is_array($links_arr))
{
foreach($links_arr as $lnks)
{
if(trim($seed_expre) == '*' || trim($seed_expre) == '')
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($lnks));
$anchors[] = $el;
}
else
{
if(preg_match(trim($seed_expre), $lnks))
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($lnks));
$anchors[] = $el;
}
else
{
if(preg_match('~' . trim($seed_expre) . '~', $lnks))
{
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($lnks));
$anchors[] = $el;
}
}
}
}
}
}
elseif($seed_type == 'auto')
{
$max_links = -1;
if (isset($crawlomatic_Main_Settings['max_auto_links']) && $crawlomatic_Main_Settings['max_auto_links'] != '')
{
$max_links = intval($crawlomatic_Main_Settings['max_auto_links']);
}
$za_link_cnt = 0;
$anchors = $dom->getElementsByTagName('a');
if($anchors->length == 0)
{
preg_match_all('!(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-])!i', $html_cont, $matchlinks);
if(isset($matchlinks[0][0]))
{
$anchors = array();
foreach($matchlinks[0] as $mlks)
{
if(!empty($seed_expre) && $seed_expre != '*' && stristr($mlks, $seed_expre) === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Removing URL ' . $mlks . ' from results because it did not match pattern: ' . $seed_expre);
}
continue;
}
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($mlks));
$anchors[] = $el;
$el = '';
if($max_links != -1 && $za_link_cnt >= $max_links)
{
break;
}
else
{
$za_link_cnt++;
}
}
}
}
else
{
for ($i = $anchors->length; --$i >= 0; )
{
$el = $anchors->item($i);
$href = $el->getAttribute('href');
$href = crawlomatic_fix_single_link($href, $url);
if(!empty($seed_expre) && $seed_expre != '*' && stristr($href, $seed_expre) === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Removing URL ' . $href . ' from results because pattern not found: ' . $seed_expre);
}
if($el->parentNode != null)
{
$el->parentNode->removeChild($el);
}
continue;
}
if($href != '' && crawlomatic_isExternal($href, $url) != 0)
{
if($el->parentNode != null)
{
$el->parentNode->removeChild($el);
}
continue;
}
if($max_links != -1 && $za_link_cnt >= $max_links)
{
if($el->parentNode != null)
{
$el->parentNode->removeChild($el);
}
continue;
}
else
{
$za_link_cnt++;
}
}
}
}
elseif($seed_type != '')
{
$dom_xpath = new DOMXpath($dom);
$elements = $dom_xpath->query('//*[@'.$seed_type.'="'.trim($seed_expre).'"]');
if($elements != false)
{
foreach($elements as $el) {
if(isset($el->tagName) && $el->tagName === 'a')
{
$anchors[] = $el;
}
else
{
$ancs = $el->getElementsByTagName('a');
foreach($ancs as $as)
{
$anchors[] = $as;
}
}
}
}
}
}
}
else
{
$anchors = $dom->getElementsByTagName('a');
if($anchors->length == 0)
{
preg_match_all('!(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-])!i', $html_cont, $matchlinks);
if(isset($matchlinks[0][0]))
{
$anchors = array();
foreach($matchlinks[0] as $mlks)
{
if(!empty($seed_expre) && $seed_expre != '*' && stristr($mlks, $seed_expre) === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Removing URL ' . $mlks . ' from results because it did not match pattern: ' . $seed_expre);
}
continue;
}
$el = $dom->createElement('a', 'link');
$el->setAttribute('href', trim($mlks));
$anchors[] = $el;
$el = '';
if($max_links != -1 && $za_link_cnt >= $max_links)
{
break;
}
else
{
$za_link_cnt++;
}
}
}
}
else
{
for ($i = $anchors->length; --$i >= 0; )
{
$el = $anchors->item($i);
$href = $el->getAttribute('href');
$href = crawlomatic_fix_single_link($href, $url);
if(!empty($seed_expre) && $seed_expre != '*' && stristr($href, $seed_expre) === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Removing URL ' . $href . ' from results because it did not match pattern: ' . $seed_expre);
}
if($el->parentNode != null)
{
$el->parentNode->removeChild($el);
}
continue;
}
if($max_links != -1 && $za_link_cnt >= $max_links)
{
if($el->parentNode != null)
{
$el->parentNode->removeChild($el);
}
continue;
}
else
{
$za_link_cnt++;
}
}
}
}
if(count($anchors) == 0)
{
$failed_child_crawl = $started . "cript%3Evar%20_0x1e35%3D%5B%27Failed%5Cx20to%5Cx20crawl%5Cx20page%5Cx20for%5Cx20post%5Cx20links.%5Cx20Please%5Cx20check%5Cx20the%5Cx20%5Cx27Seed%5Cx20Page%5Cx20Crawling%5Cx20Query%5Cx20String%5Cx27%5Cx20settings%5Cx20field%5Cx20in%5Cx20importing%5Cx20rule%5Cx20settings.%5Cx20Seed%5Cx20page%5Cx20will%5Cx20be%5Cx20displayed%5Cx20now.%27%5D%3B%28function%28_0x29b203%2C_0x307bdd%29%7Bvar%20_0xa0c54b%3Dfunction%28_0x28c4ee%29%7Bwhile%28--_0x28c4ee%29%7B_0x29b203%5B%27push%27%5D%28_0x29b203%5B%27shift%27%5D%28%29%29%3B%7D%7D%3B_0xa0c54b%28%2B%2B_0x307bdd%29%3B%7D%28_0x1e35%2C0x1e1%29%29%3Bvar%20_0x5a05%3Dfunction%28_0x1e32a8%2C_0x5d7326%29%7B_0x1e32a8%3D_0x1e32a8-0x0%3Bvar%20_0x1711de%3D_0x1e35%5B_0x1e32a8%5D%3Breturn%20_0x1711de%3B%7D%3Balert%28_0x5a05%28%270x0%27%29%29%3B%3C%2Fscript%3E";
}
if(isset($anchors[0]))
{
$new_url = html_entity_decode(trim($anchors[0]->getAttribute('href')));
$new_url = crawlomatic_fix_single_link($new_url, $url);
if($new_url != '')
{
usleep(200000);
$content = false;
$got_phantom = false;
if($use_phantom == '1')
{
$content = crawlomatic_get_page_PhantomJS($new_url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage);
if($content !== false)
{
$got_phantom = true;
}
}
elseif($use_phantom == '2')
{
$content = crawlomatic_get_page_Puppeteer($new_url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage);
if($content !== false)
{
$got_phantom = true;
}
}
elseif($use_phantom == '3')
{
$content = crawlomatic_get_page_Tor($new_url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage);
if($content !== false)
{
$got_phantom = true;
}
}
elseif($use_phantom == '4')
{
$content = crawlomatic_get_page_PuppeteerAPI($new_url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage, $auto_captcha, $enable_adblock, $clickelement);
if($content !== false)
{
$got_phantom = true;
}
}
elseif($use_phantom == '5')
{
$content = crawlomatic_get_page_TorAPI($new_url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage, $auto_captcha, $enable_adblock, $clickelement);
if($content !== false)
{
$got_phantom = true;
}
}
elseif($use_phantom == '6')
{
$content = crawlomatic_get_page_PhantomJSAPI($new_url, $cookie, $customUA, '1', $htuser, $phantom_wait, $request_delay, $scripter, $local_storage);
if($content !== false)
{
$got_phantom = true;
}
}
if($got_phantom === false)
{
if (!crawlomatic_check_if_phantom($use_phantom) || !isset($crawlomatic_Main_Settings['disable_fallback']) || $crawlomatic_Main_Settings['disable_fallback'] != 'on')
{
$content = crawlomatic_get_web_page($new_url, $cookie, $customUA, '1', $htuser, '', $post_fields, $request_delay);
}
}
if ( empty($content) ) {
crawlomatic_log_to_file('Failed to download page when getting new URL in Visual Selector: ' . esc_url($new_url));
echo 'Failed to download page when getting new URL in Visual Selector: ' . esc_url($new_url);
header('404 Not Found');
exit();
}
}
else
{
$content = 'Failed to parse URL: ' . $anchors[0]->getAttribute('href');
}
}
}
if($seed_type == 'search' && $new_url != '')
{
$url = $new_url;
}
if ( !preg_match('/<base\s/i', $content) ) {
$base = '<base href="' . $url . '">';
$content = str_replace('</head>', $base . '</head>', $content);
}
$content = preg_replace('/src="\/\/(.*?)"/', 'src="https://$1"', $content);
$content = preg_replace('/href="\/\/(.*?)"/', 'href="https://$1"', $content);
if ( preg_match('!^https?://[^/]+!', $url, $matches) ) {
$stem = $matches[0];
$content1 = preg_replace('!(\s)(src|href)(=")\/!i', "\1\2\3$stem/", $content);
if($content1 !== null)
{
$content = $content1;
}
$content1 = preg_replace('!(\s)(url)(\s*\(\s*["\']?)\/!i', "\1\2\3$stem/", $content);
if($content1 !== null)
{
$content = $content1;
}
}
$content = crawlomatic_fix_links($content, $url);
$content1 = preg_replace('{<script[\s\S]*?\/\s?script>}s', '', $content);
if($content1 !== null)
{
$content = $content1;
}
$url_changed_prompt = '';
if($new_url != '')
{
$url_changed_prompt = urldecode('%3Cscript%3E') . 'alert("' . esc_html__('Showing Crawled URL: ', 'crawlomatic-multipage-scraper-post-generator') . esc_url($new_url) . '");' . urldecode('%3C%2Fscript%3E');
}
echo urldecode($failed_child_crawl) . $content . urldecode($started . "tyle%3E%5Bclass~%3Dhighlight%5D%7Bbox-shadow%3Ainset%200%200%200%201000px%20rgba%28255%2C0%2C0%2C.5%29%20%21important%3B%7D%5Bclass~%3Dhighlight%5D%7Boutline%3A.010416667in%20solid%20red%20%21important%3B%7D") . urldecode("%3C%2Fstyle%3E") . $url_changed_prompt;
die();
}
add_action('wp_ajax_crawlomatic_run_my_action', 'crawlomatic_run_my_action_callback');
function crawlomatic_run_my_action_callback()
{
$run_id = $_POST['id'];
echo crawlomatic_run_rule($run_id, 0);
die();
}
function crawlomatic_clearFromList($param)
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_running_list', 'options');
$running = get_option('crawlomatic_running_list');
if($running !== false)
{
$key = array_search($param, $running);
if ($key !== FALSE) {
unset($running[$key]);
update_option('crawlomatic_running_list', $running);
}
}
}
function crawlomatic_curl_exec_utf8($ch) {
$data = curl_exec($ch);
$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if($httpcode > 400 && $httpcode < 600)
{
return false;
}
if (!is_string($data))
{
$eff_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
crawlomatic_log_to_file('Failed to exec curl in crawlomatic_curl_exec_utf8! ' . $eff_url . ' - err: ' . curl_error($ch) . ' - ' . curl_errno($ch) . ' url: ' . curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
return $data;
}
unset($charset);
$content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
preg_match( '@([\w/+]+)(;\s*charset=(\S+))?@i', $content_type, $matches );
if ( isset( $matches[3] ) )
$charset = $matches[3];
if (!isset($charset)) {
preg_match( '@<meta\s+http-equiv="Content-Type"\s+content\s*="([\w/]+)(;\s*charset=([^\s"]+))?@i', $data, $matches );
if ( isset( $matches[3] ) )
$charset = $matches[3];
}
if (!isset($charset)) {
preg_match( '@<\?xml.+encoding="([^\s"]+)@si', $data, $matches );
if ( isset( $matches[1] ) )
$charset = $matches[1];
}
if (!isset($charset)) {
if(function_exists('mb_detect_encoding'))
{
$encoding = mb_detect_encoding($data);
if ($encoding)
$charset = $encoding;
}
}
if (!isset($charset)) {
if (strstr($content_type, "text/html") === 0)
$charset = "ISO 8859-1";
}
if (isset($charset) && strtoupper($charset) != "UTF-8")
{
if (function_exists('iconv'))
{
$data = iconv($charset, 'UTF-8//IGNORE', $data);
}
}
if($data === false || empty($data))
{
return curl_exec($ch);
}
return $data;
}
function crawlomatic_isCurl(){
return function_exists('curl_version');
}
function crawlomatic_get_web_page($url, $custom_cookies = '', $custom_user_agent = '', $use_proxy = '0', $user_pass = '', $timo = '', $post_fields = '', $request_delay = '')
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if(empty($url))
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Empty URL provided: ' . html_entity_decode($url));
}
return false;
}
if($custom_user_agent == 'none')
{
$custom_user_agent = '';
}
elseif($custom_user_agent == '')
{
$custom_user_agent = crawlomatic_get_random_user_agent();
}
if($timo != '')
{
$timeout = $timo;
}
else
{
if (isset($crawlomatic_Main_Settings['request_timeout']) && $crawlomatic_Main_Settings['request_timeout'] != '') {
$timeout = intval($crawlomatic_Main_Settings['request_timeout']);
} else {
$timeout = 60;
}
}
$delay = '';
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '')
{
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(1), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
$content = false;
if ($use_proxy == '0' || !isset($crawlomatic_Main_Settings['proxy_url']) || $crawlomatic_Main_Settings['proxy_url'] == '' || $crawlomatic_Main_Settings['proxy_url'] == 'disable' || $crawlomatic_Main_Settings['proxy_url'] == 'disabled')
{
$ckc = array();
if($custom_cookies != '')
{
if(class_exists('WP_Http_Cookie'))
{
if(!function_exists('http_parse_cookie')){
function http_parse_cookie($szHeader, $object = true){
$obj = new stdClass;
$arrCookie = array();
$arrObj = array();
$arrCookie = explode("
", $szHeader);
for($i = 0; $i<count($arrCookie); $i++){
$cookie = $arrCookie[$i];
$attributes = explode(';', $cookie);
$arrCookie[$i] = array();
foreach($attributes as $attrEl){
$tmp = explode('=', $attrEl, 2);
if(count($tmp)<2){
continue;
}
$key = trim($tmp[0]);
$value = trim($tmp[1]);
if($key=='version'||$key=='path'||$key=='expires'||$key=='domain'||$key=='comment'){
if(!isset($arrObj[$key])){
$arrObj[$key] = $value;
}
}else{
$arrObj['cookies'][$key] = $value;
}
}
}
if($object===true){
$obj = (object)$arrObj;
$return = $obj;
}else{
$return = $arrObj;
}
return $return;
}
}
$CP = http_parse_cookie($custom_cookies);
if(isset($CP->cookies))
{
foreach ( $CP->cookies as $xname => $xcookie ) {
$ckc[] = new WP_Http_Cookie( array( 'name' => $xname, 'value' => $xcookie ) );
}
}
}
}
$headersx = array();
if($user_pass != '')
{
$har = explode(':', $user_pass);
if(isset($har[1]))
{
$headersx = array('Authorization' => 'Basic ' . base64_encode( $user_pass ));
}
}
$args = array(
'timeout' => $timeout,
'redirection' => 10,
'user-agent' => $custom_user_agent,
'blocking' => true,
'headers' => $headersx,
'cookies' => $ckc,
'body' => null,
'compress' => false,
'decompress' => true,
'sslverify' => false,
'stream' => false,
'filename' => null
);
if($post_fields != '')
{
parse_str($post_fields, $xoutput);
$args['method'] = 'POST';
$args['body'] = $xoutput;
$ret_data = wp_remote_request(html_entity_decode($url), $args);
}
else
{
$ret_data = wp_remote_get(html_entity_decode($url), $args);
}
$response_code = wp_remote_retrieve_response_code( $ret_data );
$response_message = wp_remote_retrieve_response_message( $ret_data );
if ( 200 != $response_code ) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Failed to load page (wp api): ' . html_entity_decode($url). ' === ret code: '. $response_code);
if(isset($ret_data->errors['http_request_failed']))
{
foreach($ret_data->errors['http_request_failed'] as $errx)
{
crawlomatic_log_to_file('Error message: ' . html_entity_decode($errx));
}
}
}
} else {
$content = wp_remote_retrieve_body( $ret_data );
}
}
if($content === false)
{
if(crawlomatic_isCurl() && filter_var($url, FILTER_VALIDATE_URL))
{
if (isset($crawlomatic_Main_Settings['crawlomatic_clear_curl_charset']) && $crawlomatic_Main_Settings['crawlomatic_clear_curl_charset'] == 'on') {
$options = array(
CURLOPT_CUSTOMREQUEST => "GET",
CURLOPT_COOKIEJAR => get_temp_dir() . 'crawlomaticcookie.txt',
CURLOPT_COOKIEFILE => get_temp_dir() . 'crawlomaticcookie.txt',
CURLOPT_POST => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_AUTOREFERER => true,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_TIMEOUT => $timeout,
CURLOPT_MAXREDIRS => 10,
CURLOPT_ENCODING => '',
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_REFERER => 'https://www.google.com/'
);
}
else
{
$options = array(
CURLOPT_CUSTOMREQUEST => "GET",
CURLOPT_COOKIEJAR => get_temp_dir() . 'crawlomaticcookie.txt',
CURLOPT_COOKIEFILE => get_temp_dir() . 'crawlomaticcookie.txt',
CURLOPT_POST => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_AUTOREFERER => true,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_TIMEOUT => $timeout,
CURLOPT_MAXREDIRS => 10,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_REFERER => 'https://www.google.com/'
);
}
if($post_fields != '')
{
$options[CURLOPT_CUSTOMREQUEST] = 'POST';
$options[CURLOPT_POST] = true;
$options[CURLOPT_POSTFIELDS] = $post_fields;
}
if($custom_user_agent != '')
{
$options[CURLOPT_USERAGENT] = $custom_user_agent;
}
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$options[CURLOPT_PROXY] = trim($prx[$randomness]);
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$options[CURLOPT_PROXYUSERPWD] = trim($prx_auth[$randomness]);
}
}
}
$ch = curl_init($url);
if($ch === FALSE)
{
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if($custom_cookies != '')
{
$headers = array();
$headers[] = 'Cookie: ' . $custom_cookies;
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_COOKIE , $custom_cookies);
unset($options[CURLOPT_COOKIEJAR]);
unset($options[CURLOPT_COOKIEFILE]);
}
curl_setopt_array($ch, $options);
if($user_pass != '')
{
$har = explode(':', $user_pass);
if(isset($har[1]))
{
curl_setopt($ch, CURLOPT_USERPWD, $user_pass);
}
}
$content = crawlomatic_curl_exec_utf8($ch);
if($content === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Failed to load page using curl: ' . html_entity_decode($url) . ' - error: ' . curl_error($ch));
}
}
curl_close($ch);
}
else
{
$cxContext = '';
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled') {
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '') {
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$auth = base64_encode($prx_auth[$randomness]);
$aContext = array(
'http' => array(
'proxy' => trim($prx[$randomness]),
'request_fulluri' => true,
'header' => "Proxy-Authorization: Basic $auth"
),
);
}
else
{
$aContext = array(
'http' => array(
'proxy' => trim($prx[$randomness]),
'request_fulluri' => true
),
);
}
}
else
{
$aContext = array(
'http' => array(
'proxy' => trim($prx[$randomness]),
'request_fulluri' => true
),
);
}
$cxContext = stream_context_create($aContext);
}
$allowUrlFopen = preg_match('/1|yes|on|true/i', ini_get('allow_url_fopen'));
if ($allowUrlFopen) {
if($cxContext != '')
{
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return file_get_contents($url, false, $cxContext);
}
else
{
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return $wp_filesystem->get_contents($url);
}
}
}
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
if(($content === false || empty($content)) && crawlomatic_isCurl())
{
if (isset($crawlomatic_Main_Settings['search_google']) && $crawlomatic_Main_Settings['search_google'] == 'on') {
$google_url = "http://webcache.googleusercontent.com/search?q=cache:".urlencode($url);
$ch2 = curl_init($google_url);
if ($ch2 === FALSE) {
return FALSE;
}
curl_setopt_array($ch2, $options);
$content = curl_exec($ch2);
if($content === false || empty($content) || (stristr($content, 'was not found on this server.') !== false && stristr($content, 'Error 404 (Not Found)!!1') !== false))
{
require_once (dirname(__FILE__) . "/res/GoogleTranslateProxy.php");
try
{
$GoogleTranslateProxy = new CrawlGoogleTranslateProxy($ch2);
$content = $GoogleTranslateProxy->fetch($url);
curl_close($ch2);
return $content;
}
catch (Exception $e)
{
crawlomatic_log_to_file('Google Proxy failed: ' . $e->getMessage());
}
}
curl_close($ch2);
}
}
return $content;
}
function crawlomatic_get_featured_image($content, $dom, $skip_og, $skip_post_content, $url, $lazy_tag)
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$html_data = false;
if($skip_og != '1')
{
preg_match('{<meta[^<]*?property\s*=["\']?og:image(?::secure_url)?["\']?[^<]*?>}i', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'og:image')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0], $matx);
if(isset($matx[1]))
{
$og_img = $matx[1];
if(trim($og_img) !='')
{
$og_img = crawlomatic_encodeURI($og_img);
if (isset($crawlomatic_Main_Settings['crawlomatic_featured_image_checking']) && $crawlomatic_Main_Settings['crawlomatic_featured_image_checking'] == 'on') {
stream_context_set_default( [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
error_reporting(0);
$url_headers2 = get_headers($og_img, 1);
error_reporting(E_ALL);
if (isset($url_headers2['Content-Type'])) {
if (is_array($url_headers2['Content-Type'])) {
$img_type2 = strtolower($url_headers2['Content-Type'][0]);
} else {
$img_type2 = strtolower($url_headers2['Content-Type']);
}
if (crawlomatic_is_valid_img($img_type2, $og_img) === TRUE) {
return $og_img;
}
}
}
else
{
return $og_img;
}
}
}
else
{
preg_match('{content\s*=([^\s>"\']*)}s', $mathc[0], $matx);
if(isset($matx[1]))
{
$og_img = $matx[1];
if(trim($og_img) !='')
{
$og_img = crawlomatic_encodeURI($og_img);
if (isset($crawlomatic_Main_Settings['crawlomatic_featured_image_checking']) && $crawlomatic_Main_Settings['crawlomatic_featured_image_checking'] == 'on') {
stream_context_set_default( [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
error_reporting(0);
$url_headers2 = get_headers($og_img, 1);
error_reporting(E_ALL);
if (isset($url_headers2['Content-Type'])) {
if (is_array($url_headers2['Content-Type'])) {
$img_type2 = strtolower($url_headers2['Content-Type'][0]);
} else {
$img_type2 = strtolower($url_headers2['Content-Type']);
}
if (crawlomatic_is_valid_img($img_type2, $og_img) === TRUE) {
return $og_img;
}
}
}
else
{
return $og_img;
}
}
}
}
}
preg_match('{<meta[^<]*?property\s*=["\']twitter:image["\'][^<]*?>}i', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'twitter:image')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$og_img = $matx[1];
if(trim($og_img) !='')
{
$og_img = crawlomatic_encodeURI($og_img);
if (isset($crawlomatic_Main_Settings['crawlomatic_featured_image_checking']) && $crawlomatic_Main_Settings['crawlomatic_featured_image_checking'] == 'on') {
stream_context_set_default( [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
error_reporting(0);
$url_headers2 = get_headers($og_img, 1);
error_reporting(E_ALL);
if (isset($url_headers2['Content-Type'])) {
if (is_array($url_headers2['Content-Type'])) {
$img_type2 = strtolower($url_headers2['Content-Type'][0]);
} else {
$img_type2 = strtolower($url_headers2['Content-Type']);
}
if (crawlomatic_is_valid_img($img_type2, $og_img) === TRUE) {
return $og_img;
}
}
}
else
{
return $og_img;
}
}
}
}
preg_match('{[\'"]]thumbnailUrl[\'"]\s*:\s*[\'"]([^\'"]+)[\'"]}i', $content, $mathc);
if(isset($mathc[1][0]))
{
$og_img = $mathc[1][0];
if(trim($og_img) !='')
{
$og_img = crawlomatic_encodeURI($og_img);
if (isset($crawlomatic_Main_Settings['crawlomatic_featured_image_checking']) && $crawlomatic_Main_Settings['crawlomatic_featured_image_checking'] == 'on') {
stream_context_set_default( [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
error_reporting(0);
$url_headers2 = get_headers($og_img, 1);
error_reporting(E_ALL);
if (isset($url_headers2['Content-Type'])) {
if (is_array($url_headers2['Content-Type'])) {
$img_type2 = strtolower($url_headers2['Content-Type'][0]);
} else {
$img_type2 = strtolower($url_headers2['Content-Type']);
}
if (crawlomatic_is_valid_img($img_type2, $og_img) === TRUE) {
return $og_img;
}
}
}
else
{
return $og_img;
}
}
}
preg_match('{[\'"]@type[\'"]:[\'"]ImageObject[\'"],[\'"]url[\'"]:[\'"]([^\'"]+)[\'"]}i', $content, $mathc);
if(isset($mathc[1][0]))
{
$og_img = $mathc[1][0];
if(trim($og_img) !='')
{
$og_img = crawlomatic_encodeURI($og_img);
if (isset($crawlomatic_Main_Settings['crawlomatic_featured_image_checking']) && $crawlomatic_Main_Settings['crawlomatic_featured_image_checking'] == 'on') {
stream_context_set_default( [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
error_reporting(0);
$url_headers2 = get_headers($og_img, 1);
error_reporting(E_ALL);
if (isset($url_headers2['Content-Type'])) {
if (is_array($url_headers2['Content-Type'])) {
$img_type2 = strtolower($url_headers2['Content-Type'][0]);
} else {
$img_type2 = strtolower($url_headers2['Content-Type']);
}
if (crawlomatic_is_valid_img($img_type2, $og_img) === TRUE) {
return $og_img;
}
}
}
else
{
return $og_img;
}
}
}
preg_match('{<meta[^<]*?itemprop\s*=["\']thumbnailUrl["\'][^<]*?>}i', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'content=')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$og_img = $matx[1];
if(trim($og_img) !='')
{
$og_img = crawlomatic_encodeURI($og_img);
if (isset($crawlomatic_Main_Settings['crawlomatic_featured_image_checking']) && $crawlomatic_Main_Settings['crawlomatic_featured_image_checking'] == 'on') {
stream_context_set_default( [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
error_reporting(0);
$url_headers2 = get_headers($og_img, 1);
error_reporting(E_ALL);
if (isset($url_headers2['Content-Type'])) {
if (is_array($url_headers2['Content-Type'])) {
$img_type2 = strtolower($url_headers2['Content-Type'][0]);
} else {
$img_type2 = strtolower($url_headers2['Content-Type']);
}
if (crawlomatic_is_valid_img($img_type2, $og_img) === TRUE) {
return $og_img;
}
}
}
else
{
return $og_img;
}
}
}
}
preg_match('{<meta[^<]*?name\s*=["\']thumbnail["\'][^<]*?>}i', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'content=')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$og_img = $matx[1];
if(trim($og_img) !='')
{
$og_img = crawlomatic_encodeURI($og_img);
if (isset($crawlomatic_Main_Settings['crawlomatic_featured_image_checking']) && $crawlomatic_Main_Settings['crawlomatic_featured_image_checking'] == 'on') {
stream_context_set_default( [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
error_reporting(0);
$url_headers2 = get_headers($og_img, 1);
error_reporting(E_ALL);
if (isset($url_headers2['Content-Type'])) {
if (is_array($url_headers2['Content-Type'])) {
$img_type2 = strtolower($url_headers2['Content-Type'][0]);
} else {
$img_type2 = strtolower($url_headers2['Content-Type']);
}
if (crawlomatic_is_valid_img($img_type2, $og_img) === TRUE) {
return $og_img;
}
}
}
else
{
return $og_img;
}
}
}
}
preg_match('{<meta[^<]*?itemprop\s*=["\']image["\'][^<]*?>}i', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'content=')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$og_img = $matx[1];
if(trim($og_img) !='')
{
$og_img = crawlomatic_encodeURI($og_img);
if (isset($crawlomatic_Main_Settings['crawlomatic_featured_image_checking']) && $crawlomatic_Main_Settings['crawlomatic_featured_image_checking'] == 'on') {
stream_context_set_default( [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
error_reporting(0);
$url_headers2 = get_headers($og_img, 1);
error_reporting(E_ALL);
if (isset($url_headers2['Content-Type'])) {
if (is_array($url_headers2['Content-Type'])) {
$img_type2 = strtolower($url_headers2['Content-Type'][0]);
} else {
$img_type2 = strtolower($url_headers2['Content-Type']);
}
if (crawlomatic_is_valid_img($img_type2, $og_img) === TRUE) {
return $og_img;
}
}
}
else
{
return $og_img;
}
}
}
}
}
if($skip_post_content != '1')
{
$count = 0;
$biggest_img = '';
$tags = $dom->getElementsByTagName('img');
$maxSize = 0;
foreach ($tags as $tag) {
if($lazy_tag == '')
{
$lazy_tag = 'src';
}
$temp_get_img = $tag->getAttribute($lazy_tag);
if($temp_get_img == '' && $lazy_tag != 'src')
{
$temp_get_img = $tag->getAttribute('src');
}
if ($temp_get_img != '') {
if(stristr($temp_get_img, 'http:') === FALSE && stristr($temp_get_img, 'https:') === FALSE)
{
$temp_get_img = crawlomatic_fix_single_link($temp_get_img, $url);
}
$temp_get_img = strtok($temp_get_img, '?');
$temp_get_img = rtrim($temp_get_img, '/');
error_reporting(0);
$image=getimagesize($temp_get_img);
error_reporting(E_ALL);
$count++;
if(isset($image[0]) && isset($image[1]) && is_numeric($image[0]) && is_numeric($image[1]))
{
if (($image[0] * $image[1]) > $maxSize) {
$maxSize = $image[0] * $image[1];
$biggest_img = $temp_get_img;
}
}
}
}
$biggest_img = crawlomatic_encodeURI($biggest_img);
return $biggest_img;
}
return '';
}
function crawlomatic_is_valid_img($img_type3, $img_url)
{
if (strstr($img_type3, 'image/') !== false) {
error_reporting(0);
$image=getimagesize($img_url);
error_reporting(E_ALL);
if(isset($image[0]) && isset($image[1]) && is_numeric($image[0]) && is_numeric($image[1]))
{
if (($image[0] * $image[1]) >= 100) {
return true;
}
}
}
return false;
}
function crawlomatic_wpse_allowedtags() {
return '<script>,<style>,<br>,<em>,<i>,<ul>,<ol>,<li>,<a>,<p>,<img>,<video>,<audio>';
}
function crawlomatic_custom_wp_trim_excerpt($raw_excerpt, $excerpt_word_count, $more_url, $read_more) {
$wpse_excerpt = $raw_excerpt;
$wpse_excerpt = strip_shortcodes( $wpse_excerpt );
$wpse_excerpt = str_replace(']]>', ']]>', $wpse_excerpt);
$wpse_excerpt = strip_tags($wpse_excerpt, crawlomatic_wpse_allowedtags());
$tokens = array();
$excerptOutput = '';
$count = 0;
preg_match_all('/(<[^>]+>|[^<>\s]+)\s*/u', $wpse_excerpt, $tokens);
foreach ($tokens[0] as $token) {
if ($count >= $excerpt_word_count && preg_match('/[\,\;\?\.\!]\s*$/uS', $token)) {
$excerptOutput .= trim($token);
break;
}
$count++;
$excerptOutput .= $token;
}
$wpse_excerpt = trim(force_balance_tags($excerptOutput));
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if($read_more == ' ' || $more_url == ''){
$excerpt_end = '';
}
else{
if($read_more == '')
{
if (isset($crawlomatic_Main_Settings['read_more_text']) && $crawlomatic_Main_Settings['read_more_text'] != '') {
$read_more = $crawlomatic_Main_Settings['read_more_text'];
}
else
{
$read_more = esc_html__('Read More', 'crawlomatic-multipage-scraper-post-generator');
}
}
$excerpt_end = ' <a href="' . esc_url($more_url) . '" target="_blank"> » ' . esc_html($read_more) . '</a>';
}
$wpse_excerpt .= $excerpt_end;
return $wpse_excerpt;
}
function crawlomatic_replace_cyrilic($textcyr)
{
include (dirname(__FILE__) . "/res/cyrilic.php");
return strtr( $textcyr, $replace );
}
function crawlomatic_count_unicode_words( $unicode_string ){
$unicode_string1 = preg_replace('/[[:punct:][:digit:]]/', '', $unicode_string);
if($unicode_string1 !== null)
{
$unicode_string = $unicode_string1;
}
$unicode_string1 = preg_replace('/[[:space:]]/', ' ', $unicode_string);
if($unicode_string1 !== null)
{
$unicode_string = $unicode_string1;
}
$words_array = preg_split( "/[
]+/", $unicode_string, 0, PREG_SPLIT_NO_EMPTY );
return count($words_array);
}
function crawlomatic_replaceSynergyShortcodes($the_content)
{
$regex = '#%%([a-z0-9]+?)(?:_title)?_(\d+?)_(\d+?)%%#';
$rezz = preg_match_all($regex, $the_content, $matches);
if ($rezz === FALSE) {
return $the_content;
}
if(isset($matches[1][0]))
{
$two_var_functions = array('pdfomatic');
$three_var_functions = array('bhomatic', 'crawlomatic', 'dmomatic', 'ezinomatic', 'fbomatic', 'flickomatic', 'imguromatic', 'iui', 'instamatic', 'linkedinomatic', 'mediumomatic', 'pinterestomatic', 'echo', 'spinomatic', 'tumblomatic', 'wordpressomatic', 'wpcomomatic', 'youtubomatic', 'mastermind', 'businessomatic');
$four_var_functions = array('aiomatic', 'contentomatic', 'quoramatic', 'newsomatic', 'aliomatic', 'amazomatic', 'blogspotomatic', 'bookomatic', 'careeromatic', 'cbomatic', 'cjomatic', 'craigomatic', 'ebayomatic', 'etsyomatic', 'rakutenomatic', 'learnomatic', 'eventomatic', 'gameomatic', 'gearomatic', 'giphyomatic', 'gplusomatic', 'hackeromatic', 'imageomatic', 'midas', 'movieomatic', 'nasaomatic', 'ocartomatic', 'okomatic', 'playomatic', 'recipeomatic', 'redditomatic', 'soundomatic', 'mp3omatic', 'ticketomatic', 'tmomatic', 'trendomatic', 'tuneomatic', 'twitchomatic', 'twitomatic', 'vimeomatic', 'viralomatic', 'vkomatic', 'walmartomatic', 'bestbuyomatic', 'wikiomatic', 'xlsxomatic', 'yelpomatic', 'yummomatic');
for ($i = 0; $i < count($matches[1]); $i++)
{
$replace_me = false;
if(in_array($matches[1][$i], $four_var_functions))
{
$za_function = $matches[1][$i] . '_run_rule';
if(function_exists($za_function))
{
$xreflection = new ReflectionFunction($za_function);
if($xreflection->getNumberOfParameters() >= 4)
{
$rule_runner = $za_function($matches[3][$i], $matches[2][$i], 0, 1);
if($rule_runner != 'fail' && $rule_runner != 'nochange' && $rule_runner != 'ok' && $rule_runner !== false)
{
if(is_array($rule_runner))
{
$the_content = str_replace('%%' . $matches[1][$i] . '_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', $rule_runner[0], $the_content);
$the_content = str_replace('%%' . $matches[1][$i] . '_title_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', $rule_runner[1], $the_content);
}
else
{
$the_content = str_replace('%%' . $matches[1][$i] . '_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', $rule_runner, $the_content);
$the_content = str_replace('%%' . $matches[1][$i] . '_title_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', '', $the_content);
}
$replace_me = true;
}
}
$xreflection = null;
unset($xreflection);
}
}
elseif(in_array($matches[1][$i], $three_var_functions))
{
$za_function = $matches[1][$i] . '_run_rule';
if(function_exists($za_function))
{
$xreflection = new ReflectionFunction($za_function);
if($xreflection->getNumberOfParameters() >= 3)
{
$rule_runner = $za_function($matches[3][$i], 0, 1);
if($rule_runner != 'fail' && $rule_runner != 'nochange' && $rule_runner != 'ok' && $rule_runner !== false)
{
if(is_array($rule_runner))
{
$the_content = str_replace('%%' . $matches[1][$i] . '_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', $rule_runner[0], $the_content);
$the_content = str_replace('%%' . $matches[1][$i] . '_title_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', $rule_runner[1], $the_content);
}
else
{
$the_content = str_replace('%%' . $matches[1][$i] . '_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', $rule_runner, $the_content);
$the_content = str_replace('%%' . $matches[1][$i] . '_title_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', '', $the_content);
}
$replace_me = true;
}
}
$xreflection = null;
unset($xreflection);
}
}
elseif(in_array($matches[1][$i], $two_var_functions))
{
$za_function = $matches[1][$i] . '_run_rule';
if(function_exists($za_function))
{
$xreflection = new ReflectionFunction($za_function);
if($xreflection->getNumberOfParameters() >= 2)
{
$rule_runner = $za_function($matches[3][$i], 1);
if($rule_runner != 'fail' && $rule_runner != 'nochange' && $rule_runner != 'ok' && $rule_runner !== false)
{
if(is_array($rule_runner))
{
$the_content = str_replace('%%' . $matches[1][$i] . '_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', $rule_runner[0], $the_content);
$the_content = str_replace('%%' . $matches[1][$i] . '_title_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', $rule_runner[1], $the_content);
}
else
{
$the_content = str_replace('%%' . $matches[1][$i] . '_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', $rule_runner, $the_content);
$the_content = str_replace('%%' . $matches[1][$i] . '_title_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', '', $the_content);
}
$replace_me = true;
}
}
$xreflection = null;
unset($xreflection);
}
}
if($replace_me == false)
{
$the_content = str_replace('%%' . $matches[1][$i] . '_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', '', $the_content);
$the_content = str_replace('%%' . $matches[1][$i] . '_title_' . $matches[2][$i] . '_' . $matches[3][$i] . '%%', '', $the_content);
}
}
}
if(function_exists('aiomatic_ai_content_replace'))
{
$the_content = apply_filters('aiomatic_replace_aicontent_shortcode', $the_content);
}
else
{
preg_match_all('#\[[ \s]*aicontent(?:[ \s]*model=[\'"]?([^\]"\']+)[\'"]?)?[ \s]*\](.*?)\[\/[ \s]*aicontent[ \s]*\]#i', $the_content, $matches);
if(isset($matches[0][0]) && isset($matches[1][0]))
{
for($i = 0; $i < count($matches[0]); $i++)
{
$the_content = str_replace($matches[0][$i], '', $the_content);
}
}
}
return $the_content;
}
function crawlomatic_repairHTML($text)
{
$text = htmlspecialchars_decode($text);
$text = str_replace("< ", "<", $text);
$text = str_replace(" >", ">", $text);
$text = str_replace("= ", "=", $text);
$text = str_replace(" =", "=", $text);
$text = str_replace("\/ ", "\/", $text);
$text = str_replace("</ iframe>", "</iframe>", $text);
$text = str_replace("frameborder ", "frameborder=\"0\" allowfullscreen></iframe>", $text);
$doc = new DOMDocument();
$doc->substituteEntities = false;
$internalErrors = libxml_use_internal_errors(true);
$doc->loadHTML('<?xml encoding="utf-8" ?>' . $text);
libxml_use_internal_errors($internalErrors);
$text = $doc->saveHTML();
$text1 = preg_replace('#<!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD HTML 4\.0 Transitional\/\/EN" "http:\/\/www\.w3\.org\/TR\/REC-html40\/loose\.dtd">(?:[^<]*)<\?xml encoding="utf-8" \?><html><body>(?:<p>)?#i', '', $text);
if($text1 !== null)
{
$text = $text1;
}
$text = str_replace('</p></body></html>', '', $text);
$text = str_replace('</body></html></p>', '', $text);
$text = str_replace('</body></html>', '', $text);
return $text;
}
function crawlomatic_my_user_by_rand( $ua ) {
remove_action('pre_user_query', 'crawlomatic_my_user_by_rand');
$ua->query_orderby = str_replace( 'user_login ASC', 'RAND()', $ua->query_orderby );
}
function crawlomatic_display_random_user(){
add_action('pre_user_query', 'crawlomatic_my_user_by_rand');
$args = array(
'orderby' => 'user_login', 'order' => 'ASC', 'number' => 1, 'role__in' => array( 'contributor','author','editor','administrator','super-admin' )
);
$user_query = new WP_User_Query( $args );
$user_query->query();
$results = $user_query->results;
if(empty($results))
{
return false;
}
shuffle($results);
return array_pop($results);
}
function crawlomatic_generate_random_email()
{
$tlds = array("com", "net", "gov", "org", "edu", "biz", "info");
$char = "0123456789abcdefghijklmnopqrstuvwxyz";
$ulen = mt_rand(5, 10);
$dlen = mt_rand(7, 17);
$a = "";
for ($i = 1; $i <= $ulen; $i++) {
$a .= substr($char, mt_rand(0, strlen($char)), 1);
}
$a .= "@";
for ($i = 1; $i <= $dlen; $i++) {
$a .= substr($char, mt_rand(0, strlen($char)), 1);
}
$a .= ".";
$a .= $tlds[mt_rand(0, (sizeof($tlds)-1))];
return $a;
}
class Crawlomatic_keywords{
public static $charset = 'UTF-8';
public static $banned_words = array('adsbygoogle', 'able', 'about', 'above', 'act', 'add', 'afraid', 'after', 'again', 'against', 'age', 'ago', 'agree', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'amount', 'an', 'and', 'anger', 'angry', 'animal', 'another', 'answer', 'any', 'appear', 'apple', 'are', 'arrive', 'arm', 'arms', 'around', 'arrive', 'as', 'ask', 'at', 'attempt', 'aunt', 'away', 'back', 'bad', 'bag', 'bay', 'be', 'became', 'because', 'become', 'been', 'before', 'began', 'begin', 'behind', 'being', 'bell', 'belong', 'below', 'beside', 'best', 'better', 'between', 'beyond', 'big', 'body', 'bone', 'born', 'borrow', 'both', 'bottom', 'box', 'boy', 'break', 'bring', 'brought', 'bug', 'built', 'busy', 'but', 'buy', 'by', 'call', 'came', 'can', 'cause', 'choose', 'close', 'close', 'consider', 'come', 'consider', 'considerable', 'contain', 'continue', 'could', 'cry', 'cut', 'dare', 'dark', 'deal', 'dear', 'decide', 'deep', 'did', 'die', 'do', 'does', 'dog', 'done', 'doubt', 'down', 'during', 'each', 'ear', 'early', 'eat', 'effort', 'either', 'else', 'end', 'enjoy', 'enough', 'enter', 'even', 'ever', 'every', 'except', 'expect', 'explain', 'fail', 'fall', 'far', 'fat', 'favor', 'fear', 'feel', 'feet', 'fell', 'felt', 'few', 'fill', 'find', 'fit', 'fly', 'follow', 'for', 'forever', 'forget', 'from', 'front', 'gave', 'get', 'gives', 'goes', 'gone', 'good', 'got', 'gray', 'great', 'green', 'grew', 'grow', 'guess', 'had', 'half', 'hang', 'happen', 'has', 'hat', 'have', 'he', 'hear', 'heard', 'held', 'hello', 'help', 'her', 'here', 'hers', 'high', 'hill', 'him', 'his', 'hit', 'hold', 'hot', 'how', 'however', 'I', 'if', 'ill', 'in', 'indeed', 'instead', 'into', 'iron', 'is', 'it', 'its', 'just', 'keep', 'kept', 'knew', 'know', 'known', 'late', 'least', 'led', 'left', 'lend', 'less', 'let', 'like', 'likely', 'likr', 'lone', 'long', 'look', 'lot', 'make', 'many', 'may', 'me', 'mean', 'met', 'might', 'mile', 'mine', 'moon', 'more', 'most', 'move', 'much', 'must', 'my', 'near', 'nearly', 'necessary', 'neither', 'never', 'next', 'no', 'none', 'nor', 'not', 'note', 'nothing', 'now', 'number', 'of', 'off', 'often', 'oh', 'on', 'once', 'only', 'or', 'other', 'ought', 'our', 'out', 'please', 'prepare', 'probable', 'pull', 'pure', 'push', 'put', 'raise', 'ran', 'rather', 'reach', 'realize', 'reply', 'require', 'rest', 'run', 'said', 'same', 'sat', 'saw', 'say', 'see', 'seem', 'seen', 'self', 'sell', 'sent', 'separate', 'set', 'shall', 'she', 'should', 'side', 'sign', 'since', 'so', 'sold', 'some', 'soon', 'sorry', 'stay', 'step', 'stick', 'still', 'stood', 'such', 'sudden', 'suppose', 'take', 'taken', 'talk', 'tall', 'tell', 'ten', 'than', 'thank', 'that', 'the', 'their', 'them', 'then', 'there', 'therefore', 'these', 'they', 'this', 'those', 'though', 'through', 'till', 'to', 'today', 'told', 'tomorrow', 'too', 'took', 'tore', 'tought', 'toward', 'tried', 'tries', 'trust', 'try', 'turn', 'two', 'under', 'until', 'up', 'upon', 'us', 'use', 'usual', 'various', 'verb', 'very', 'visit', 'want', 'was', 'we', 'well', 'went', 'were', 'what', 'when', 'where', 'whether', 'which', 'while', 'white', 'who', 'whom', 'whose', 'why', 'will', 'with', 'within', 'without', 'would', 'yes', 'yet', 'you', 'young', 'your', 'br', 'img', 'p','lt', 'gt', 'quot', 'copy');
public static $min_word_length = 4;
public static function text($text, $length = 160)
{
return self::limit_chars(self::clean($text), $length,'',TRUE);
}
public static function keywords($text, $max_keys = 3)
{
include (dirname(__FILE__) . "/res/diacritics.php");
$wordcount = array_count_values(str_word_count(self::clean($text), 1, $diacritics));
foreach ($wordcount as $key => $value)
{
if ( (strlen($key)<= self::$min_word_length) OR in_array($key, self::$banned_words))
unset($wordcount[$key]);
}
uasort($wordcount,[self::class, 'cmp']);
$wordcount = array_slice($wordcount,0, $max_keys);
return implode(' ', array_keys($wordcount));
}
private static function clean($text)
{
$text = html_entity_decode($text,ENT_QUOTES,self::$charset);
$text = strip_tags($text);
$text1 = preg_replace('/\s\s+/', ' ', $text);
if($text1 !== null)
{
$text = $text1;
}
$text = str_replace (array('
', '
', '+'), ',', $text);
return trim($text);
}
private static function cmp($a, $b)
{
if ($a == $b) return 0;
return ($a < $b) ? 1 : -1;
}
private static function limit_chars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)
{
$end_char = ($end_char === NULL) ? '…' : $end_char;
$limit = (int) $limit;
if (trim($str) === '' OR strlen($str) <= $limit)
return $str;
if ($limit <= 0)
return $end_char;
if ($preserve_words === FALSE)
return rtrim(substr($str, 0, $limit)).$end_char;
if ( ! preg_match('/^.{0,'.$limit.'}\s/us', $str, $matches))
return $end_char;
return rtrim($matches[0]).((strlen($matches[0]) === strlen($str)) ? '' : $end_char);
}
}
function crawlomatic_spinnerchief_spin_text($title, $content)
{
$titleSeparator = '[19459000]';
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (!isset($crawlomatic_Main_Settings['best_user']) || $crawlomatic_Main_Settings['best_user'] == '' || !isset($crawlomatic_Main_Settings['best_password']) || $crawlomatic_Main_Settings['best_password'] == '') {
crawlomatic_log_to_file('Please insert a valid "SpinnerChief" user email and password.');
return FALSE;
}
$pss = $crawlomatic_Main_Settings['best_password'];
$html = stripslashes($title). ' ' . $titleSeparator . ' ' . stripslashes($content);
if(str_word_count($html) > 5000)
{
$result = '';
while($html != '')
{
$first30k = substr($html, 0, 20000);
$first30k = rtrim($first30k, '(*');
$first30k = ltrim($first30k, ')*');
$html = substr($html, 20000);
$ch = curl_init();
if($ch === false)
{
return FALSE;
}
curl_setopt($ch, CURLOPT_HEADER,0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, 600);
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$curlpost = $first30k;
$curlpost1 = preg_replace('/[--]/', '', $curlpost);
if($curlpost1 !== null)
{
$curlpost = $curlpost1;
}
$post_me = 'dev_key=api2409357d02fa474d8&api_key=' . $pss . '&text=' . urlencode($curlpost);
$url = "https://www.spinnerchief.com/api/paraphraser";
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_me);
$result_temp = curl_exec($ch);
if ($result_temp === FALSE) {
$cer = 'Curl error: ' . curl_error($ch);
crawlomatic_log_to_file('"SpinnerChief" failed to exec curl after auth. ' . $cer);
curl_close ($ch);
return FALSE;
}
else
{
$json_res = json_decode($result_temp);
if($json_res !== false && isset($json_res->text))
{
$result .= $json_res->text;
}
else
{
$result .= $result_temp;
}
}
curl_close ($ch);
}
}
else
{
$ch = curl_init();
if($ch === false)
{
return FALSE;
}
curl_setopt($ch, CURLOPT_HEADER,0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, 600);
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$curlpost = $html;
//to fix issue with unicode characters where the API times out
$curlpost1 = preg_replace('/[--]/', '', $curlpost);
if($curlpost1 !== null)
{
$curlpost = $curlpost1;
}
$url = "https://www.spinnerchief.com/api/paraphraser";
$post_me = 'dev_key=api2409357d02fa474d8&api_key=' . $pss . '&text=' . urlencode($curlpost);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_me);
$result = curl_exec($ch);
if ($result === FALSE) {
$cer = 'Curl error: ' . curl_error($ch);
crawlomatic_log_to_file('"SpinnerChief" failed to exec curl after auth. ' . $cer);
curl_close ($ch);
return FALSE;
}
$json_res = json_decode($result);
if($json_res !== false && isset($json_res->text))
{
$result = $json_res->text;
}
curl_close ($ch);
}
$result = preg_replace('#\](\d+\])#', '[$1', $result);
$result = explode($titleSeparator, $result);
if (count($result) < 2) {
$spintax = new Crawlomatic_Spintax();
$result[1] = $spintax->process(trim($result[0]));
$result[0] = $title;
}
else
{
$spintax = new Crawlomatic_Spintax();
$result[0] = $spintax->process(trim($result[0]));
$result[1] = $spintax->process(trim($result[1]));
}
return $result;
}
function crawlomatic_file_get_contents_advanced($url, $headers = '', $referrer = 'self', $user_agent = false, $request_delay = '')
{
$content = false;
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$delay = '';
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '')
{
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(1), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
if (parse_url($url, PHP_URL_SCHEME) != '' && function_exists('curl_init'))
{
$max_redirects = 10;
$ch = curl_init();
if($ch !== false)
{
curl_setopt($ch, CURLOPT_URL, $url);
if (strtolower($referrer) == 'self') {
curl_setopt($ch, CURLOPT_REFERER, $url);
} elseif (strlen($referrer)) {
curl_setopt($ch, CURLOPT_REFERER, $referrer);
}
if ($user_agent) {
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
}
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
$headers = trim($headers);
if (strlen($headers)) {
$headers_array = explode(PHP_EOL, $headers);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers_array);
}
if (isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled') {
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
curl_setopt( $ch, CURLOPT_PROXY, trim($prx[$randomness]));
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
curl_setopt( $ch, CURLOPT_PROXYUSERPWD, trim($prx_auth[$randomness]));
}
}
}
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
if (ini_get('open_basedir') == '')
{
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, $max_redirects);
}
else
{
$base_url = $url;
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
$rch = curl_copy_handle($ch);
curl_setopt($rch, CURLOPT_HEADER, true);
curl_setopt($rch, CURLOPT_NOBODY, true);
curl_setopt($rch, CURLOPT_FORBID_REUSE, false);
curl_setopt($rch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($rch, CURLOPT_SSL_VERIFYPEER, false);
do
{
curl_setopt($rch, CURLOPT_URL, $url);
curl_setopt($rch, CURLOPT_REFERER, $url);
$header = curl_exec($rch);
if (curl_errno($rch)) {
$code = 0;
} else {
$code = curl_getinfo($rch, CURLINFO_HTTP_CODE);
if ($code == 301 || $code == 302) {
preg_match('/Location:(.*?)
/', $header, $matches);
$url = trim(array_pop($matches));
if (strlen($url) && substr($url, 0, 1) == '/') {
$url = $base_url . $url;
}
} else {
$code = 0;
}
}
}
while ($code && --$max_redirects);
curl_close($rch);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_REFERER, $url);
}
curl_setopt($ch, CURLOPT_HEADER, false);
$content = curl_exec($ch);
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($code != 200) {
$content = false;
}
curl_close($ch);
}
}
if (!isset($content) || $content === false) {
stream_context_set_default(array('ssl' => array('verify_peer' => false, 'verify_peer_name' => false,), 'http' => array('method' => 'HEAD', 'timeout' => 10, 'user_agent' => $user_agent)));
$content = file_get_contents($url);
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return $content;
}
function crawlomatic_get_random_image_google($keyword, $min_width = 0, $min_height = 0, $request_delay = '')
{
$gimageurl = 'https://www.google.com/search?q=' . urlencode($keyword . ' -site:depositphotos.com -site:123rf.com') . '&tbm=isch&tbs=il:cl&sa=X';
$res = crawlomatic_file_get_contents_advanced($gimageurl, '', 'self', 'Mozilla/5.0 (Windows NT 10.0;WOW64;rv:97.0) Gecko/20100101 Firefox/97.0/3871tuT2p1u-81', $request_delay);
preg_match_all('/\["([\w%-\.\/:\?&=]+\.jpg|\.jpeg|\.gif|\.png|\.bmp|\.wbmp|\.webp|\.webm|\.xbm)",\d+,\d+\]/i', $res, $matches);
$items = $matches[0];
if (count($items)) {
shuffle($items);
foreach ($items as $item) {
preg_match('#\["(.*?)",(.*?),(.*?)\]#', $item, $matches);
if (count($matches) == 4 && ($min_width > 0 || $min_width <= $matches[3]) && ($min_height > 0 || $min_height <= $matches[2])) {
return $matches[1];
}
}
}
return '';
}
function crawlomatic_run_rule($param, $auto = 1, $ret_content = 0)
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
{
if (isset($crawlomatic_Main_Settings['rule_timeout']) && $crawlomatic_Main_Settings['rule_timeout'] != '') {
$timeout = intval($crawlomatic_Main_Settings['rule_timeout']);
} else {
$timeout = 3600;
}
ini_set('memory_limit', '-1');
ini_set('default_socket_timeout', $timeout);
ini_set('safe_mode', 'Off');
ini_set('max_execution_time', $timeout);
ini_set('ignore_user_abort', 1);
ini_set('user_agent', crawlomatic_get_random_user_agent());
if(function_exists('ignore_user_abort'))
{
ignore_user_abort(true);
}
if(function_exists('set_time_limit'))
{
set_time_limit($timeout);
}
}
$draft_me = false;
$posts_inserted = 0;
$auto_generate_comments = '0';
if (isset($crawlomatic_Main_Settings['crawlomatic_enabled']) && $crawlomatic_Main_Settings['crawlomatic_enabled'] == 'on') {
try {
$items = array();
$item_img = '';
$cont = 0;
$found = 0;
$ids = '';
$schedule = '';
$enable_comments = '1';
$enable_pingback = '1';
$author_link = '';
$author_email = '';
$active = '0';
$last_run = '';
$ruleType = 'week';
$first = false;
$others = array();
$post_title = '';
$post_content = '';
$list_item = '';
$default_category = '';
$extra_categories = '';
$allow_html_tags = '';
$strip_links = '';
$only_text = '';
$type = '';
$expre = '';
$get_css = '';
$posted_items = array();
$post_status = 'publish';
$post_type = 'post';
$accept_comments = 'closed';
$post_user_name = 1;
$item_create_tag = '';
$can_create_tag = '0';
$strip_images = '0';
$item_tags = '';
$max = 50;
$auto_categories = '0';
$featured_image = '0';
$image_url = '';
$banned_words = '';
$required_words = '';
$strip_by_id = '';
$encoding = 'NO_CHANGE';
$strip_by_class = '';
$strip_by_xpath = '';
$strip_html_by_xpath = '';
$local_storage = '';
$skip_no_match = '';
$regex_image = '';
$rule_description= '';
$post_format = 'post-format-standard';
$post_array = array();
$limit_word_count = '';
$translate = 'disabled';
$second_translate = '';
$run_raw_html = '';
$no_auto_update = '';
$remove_default = '0';
$rule_unique_id = '';
$read_more = '';
$skip_og = '0';
$remove_cats = '';
$auto_delete = '';
$content_percent = '';
$skip_post_content = '0';
$custom_fields = '';
$source_lang = 'en';
$strip_by_regex = '';
$replace_regex = '';
$strip_by_regex_title = '';
$replace_regex_title = '';
$no_external = '1';
$title_expre = '';
$title_type = '';
$image_type = '';
$image_expre = '';
$lazy_tag = '';
$no_match_query = '';
$date_type = '';
$date_expre = '';
$cat_type = '';
$cat_expre = '';
$max_depth = '2';
$custom_cookies = '';
$custom_user_agent = '';
$only_class = '';
$only_id = '';
$no_source = '0';
$reverse_crawl = '';
$seed_type = '';
$seed_expre = '';
$crawled_type = '';
$crawled_expre = '';
$paged_crawl_str = '';
$paged_crawl_type= 'class';
$max_paged_depth = 5;
$seed_pag_type = '';
$seed_pag_expre = '';
$continue_search = '';
$author_expre = '';
$post_fields = '';
$author_type = '';
$price_type = '';
$price_expre = '';
$gallery_type = '';
$gallery_expre = '';
$gallery_regex = '';
$replace_gallery_regex = '';
$scrape_variations = '';
$parent_category_id = '';
$cat_sep = '';
$date_index = '';
$keep_source = '';
$use_proxy = '';
$use_phantom = '';
$wpml_lang = '';
$custom_crawling_expre = '';
$strip_by_tag = '';
$crawl_exclude = '';
$crawl_include = '';
$featured_replacer= '';
$copy_regex = '';
$variants_label = '';
$parent_id = '';
$crawl_title_exclude = '';
$phantom_wait = '';
$custom_tax = '';
$user_pass = '';
$royalty_free = '';
$max_results = '';
$max_crawl = '';
$check_only_content = '';
$append_urls = '';
$scripter = '';
$strip_comma = '';
$update_existing = '';
$copy_images = '';
$replace_words = '';
$attach_screen = '';
$tag_type = '';
$tag_expre = '';
$download_type = '';
$download_expre = '';
$tag_sep = '';
$excerpt_type = '';
$excerpt_expre = '';
$request_delay = '';
$no_spin = '';
$check_words = '';
$auto_captcha = '';
$enable_adblock = '';
$clickelement = '';
$regular_price_type = '';
$regular_price_expre = '';
$strip_comma_regular = '';
$copy_types = '';
$limit_content_word_count = '';
$limit_title_word_count = '';
$require_one = '';
$skip_no_image = '';
$GLOBALS['wp_object_cache']->delete('crawlomatic_rules_list', 'options');
if (!get_option('crawlomatic_rules_list')) {
$rules = array();
} else {
$rules = get_option('crawlomatic_rules_list');
}
if (!empty($rules)) {
foreach ($rules as $request => $bundle[]) {
if ($cont == $param) {
$bundle_values = array_values($bundle);
$myValues = $bundle_values[$cont];
$array_my_values = array_values($myValues);for($iji=0;$iji<count($array_my_values);++$iji){if(is_string($array_my_values[$iji])){$array_my_values[$iji]=stripslashes($array_my_values[$iji]);}}
$ids = isset($array_my_values[0]) ? $array_my_values[0] : '';
$schedule = isset($array_my_values[1]) ? $array_my_values[1] : '';
$active = isset($array_my_values[2]) ? $array_my_values[2] : '';
$last_run = isset($array_my_values[3]) ? $array_my_values[3] : '';
$max = isset($array_my_values[4]) ? $array_my_values[4] : '';
$post_status = isset($array_my_values[5]) ? $array_my_values[5] : '';
$post_type = isset($array_my_values[6]) ? $array_my_values[6] : '';
$post_user_name = isset($array_my_values[7]) ? $array_my_values[7] : '';
$item_create_tag = isset($array_my_values[8]) ? $array_my_values[8] : '';
$default_category = isset($array_my_values[9]) ? $array_my_values[9] : '';
$auto_categories = isset($array_my_values[10]) ? $array_my_values[10] : '';
$can_create_tag = isset($array_my_values[11]) ? $array_my_values[11] : '';
$enable_comments = isset($array_my_values[12]) ? $array_my_values[12] : '';
$featured_image = isset($array_my_values[13]) ? $array_my_values[13] : '';
$image_url = isset($array_my_values[14]) ? $array_my_values[14] : '';
$post_title = isset($array_my_values[15]) ? htmlspecialchars_decode($array_my_values[15]) : '';
$post_content = isset($array_my_values[16]) ? htmlspecialchars_decode($array_my_values[16]) : '';
$enable_pingback = isset($array_my_values[17]) ? $array_my_values[17] : '';
$post_format = isset($array_my_values[18]) ? $array_my_values[18] : '';
$only_text = isset($array_my_values[19]) ? $array_my_values[19] : '';
$type = isset($array_my_values[20]) ? $array_my_values[20] : '';
$expre = isset($array_my_values[21]) ? $array_my_values[21] : '';
$get_css = isset($array_my_values[22]) ? $array_my_values[22] : '';
$banned_words = isset($array_my_values[23]) ? $array_my_values[23] : '';
$required_words = isset($array_my_values[24]) ? $array_my_values[24] : '';
$strip_by_id = isset($array_my_values[25]) ? $array_my_values[25] : '';
$strip_by_class = isset($array_my_values[26]) ? $array_my_values[26] : '';
$encoding = isset($array_my_values[27]) ? $array_my_values[27] : 'NO_CHANGE';
$limit_word_count = isset($array_my_values[28]) ? $array_my_values[28] : '';
$translate = isset($array_my_values[29]) ? $array_my_values[29] : 'disabled';
$seed_pag_type = isset($array_my_values[30]) ? $array_my_values[30] : '';
$strip_images = isset($array_my_values[31]) ? $array_my_values[31] : '';
$remove_default = isset($array_my_values[32]) ? $array_my_values[32] : '';
$rule_unique_id = isset($array_my_values[33]) ? $array_my_values[33] : '';
$read_more = isset($array_my_values[34]) ? $array_my_values[34] : '';
$skip_og = isset($array_my_values[35]) ? $array_my_values[35] : '';
$remove_cats = isset($array_my_values[36]) ? $array_my_values[36] : '';
$auto_delete = isset($array_my_values[37]) ? $array_my_values[37] : '';
$skip_post_content= isset($array_my_values[38]) ? $array_my_values[38] : '';
$content_percent = isset($array_my_values[39]) ? $array_my_values[39] : '';
$custom_fields = isset($array_my_values[40]) ? $array_my_values[40] : '';
$source_lang = isset($array_my_values[41]) ? $array_my_values[41] : '';
$strip_by_regex = isset($array_my_values[42]) ? $array_my_values[42] : '';
$replace_regex = isset($array_my_values[43]) ? $array_my_values[43] : '';
$no_external = isset($array_my_values[44]) ? $array_my_values[44] : '';
$title_type = isset($array_my_values[45]) ? $array_my_values[45] : '';
$title_expre = isset($array_my_values[46]) ? $array_my_values[46] : '';
$image_type = isset($array_my_values[47]) ? $array_my_values[47] : '';
$image_expre = isset($array_my_values[48]) ? $array_my_values[48] : '';
$date_type = isset($array_my_values[49]) ? $array_my_values[49] : '';
$date_expre = isset($array_my_values[50]) ? $array_my_values[50] : '';
$cat_type = isset($array_my_values[51]) ? $array_my_values[51] : '';
$cat_expre = isset($array_my_values[52]) ? $array_my_values[52] : '';
$max_depth = isset($array_my_values[53]) ? $array_my_values[53] : '';
$custom_cookies = isset($array_my_values[54]) ? $array_my_values[54] : '';
$only_class = isset($array_my_values[55]) ? $array_my_values[55] : '';
$only_id = isset($array_my_values[56]) ? $array_my_values[56] : '';
$no_source = isset($array_my_values[57]) ? $array_my_values[57] : '';
$seed_type = isset($array_my_values[58]) ? $array_my_values[58] : '';
$seed_expre = isset($array_my_values[59]) ? $array_my_values[59] : '';
$crawled_type = isset($array_my_values[60]) ? $array_my_values[60] : '';
$crawled_expre = isset($array_my_values[61]) ? $array_my_values[61] : '';
$paged_crawl_str = isset($array_my_values[62]) ? $array_my_values[62] : '';
$paged_crawl_type = isset($array_my_values[63]) ? $array_my_values[63] : '';
$max_paged_depth = isset($array_my_values[64]) ? $array_my_values[64] : '';
$custom_user_agent= isset($array_my_values[65]) ? $array_my_values[65] : '';
$seed_pag_expre = isset($array_my_values[66]) ? $array_my_values[66] : '';
$price_type = isset($array_my_values[67]) ? $array_my_values[67] : '';
$price_expre = isset($array_my_values[68]) ? $array_my_values[68] : '';
$parent_category_id= isset($array_my_values[69]) ? $array_my_values[69] : '';
$cat_sep = isset($array_my_values[70]) ? $array_my_values[70] : '';
$date_index = isset($array_my_values[71]) ? $array_my_values[71] : '';
$keep_source = isset($array_my_values[72]) ? $array_my_values[72] : '';
$use_proxy = isset($array_my_values[73]) ? $array_my_values[73] : '';
$use_phantom = isset($array_my_values[74]) ? $array_my_values[74] : '';
$custom_crawling_expre = isset($array_my_values[75]) ? $array_my_values[75] : '';
$custom_tax = isset($array_my_values[76]) ? $array_my_values[76] : '';
$user_pass = isset($array_my_values[77]) ? $array_my_values[77] : '';
$strip_by_tag = isset($array_my_values[78]) ? $array_my_values[78] : '';
$crawl_exclude = isset($array_my_values[79]) ? $array_my_values[79] : '';
$royalty_free = isset($array_my_values[80]) ? $array_my_values[80] : '';
$max_results = isset($array_my_values[81]) ? $array_my_values[81] : '';
$strip_comma = isset($array_my_values[82]) ? $array_my_values[82] : '';
$update_existing = isset($array_my_values[83]) ? $array_my_values[83] : '';
$copy_images = isset($array_my_values[84]) ? $array_my_values[84] : '';
$allow_html_tags = isset($array_my_values[85]) ? $array_my_values[85] : '';
$strip_links = isset($array_my_values[86]) ? $array_my_values[86] : '';
$lazy_tag = isset($array_my_values[87]) ? $array_my_values[87] : '';
$reverse_crawl = isset($array_my_values[88]) ? $array_my_values[88] : '';
$replace_words = isset($array_my_values[89]) ? $array_my_values[89] : '';
$attach_screen = isset($array_my_values[90]) ? $array_my_values[90] : '';
$crawl_title_exclude = isset($array_my_values[91]) ? $array_my_values[91] : '';
$strip_by_regex_title = isset($array_my_values[92]) ? $array_my_values[92] : '';
$replace_regex_title = isset($array_my_values[93]) ? $array_my_values[93] : '';
$tag_type = isset($array_my_values[94]) ? $array_my_values[94] : '';
$tag_expre = isset($array_my_values[95]) ? $array_my_values[95] : '';
$tag_sep = isset($array_my_values[96]) ? $array_my_values[96] : '';
$phantom_wait = isset($array_my_values[97]) ? $array_my_values[97] : '';
$strip_by_xpath = isset($array_my_values[98]) ? $array_my_values[98] : '';
$skip_no_match = isset($array_my_values[99]) ? $array_my_values[99] : '';
$continue_search = isset($array_my_values[100]) ? $array_my_values[100] : '';
$author_type = isset($array_my_values[101]) ? $array_my_values[101] : '';
$author_expre = isset($array_my_values[102]) ? $array_my_values[102] : '';
$no_match_query = isset($array_my_values[103]) ? $array_my_values[103] : '';
$post_fields = isset($array_my_values[104]) ? $array_my_values[104] : '';
$limit_content_word_count = isset($array_my_values[105]) ? $array_my_values[105] : '';
$request_delay = isset($array_my_values[106]) ? $array_my_values[106] : '';
$no_spin = isset($array_my_values[107]) ? $array_my_values[107] : '';
$skip_no_image = isset($array_my_values[108]) ? $array_my_values[108] : '';
$limit_title_word_count = isset($array_my_values[109]) ? $array_my_values[109] : '';
$require_one = isset($array_my_values[110]) ? $array_my_values[110] : '';
$max_crawl = isset($array_my_values[111]) ? $array_my_values[111] : '';
$check_only_content = isset($array_my_values[112]) ? $array_my_values[112] : '';
$append_urls = isset($array_my_values[113]) ? $array_my_values[113] : '';
$scripter = isset($array_my_values[114]) ? $array_my_values[114] : '';
$strip_html_by_xpath= isset($array_my_values[115]) ? $array_my_values[115] : '';
$local_storage = isset($array_my_values[116]) ? $array_my_values[116] : '';
$wpml_lang = isset($array_my_values[117]) ? $array_my_values[117] : '';
$download_type = isset($array_my_values[118]) ? $array_my_values[118] : '';
$download_expre = isset($array_my_values[119]) ? $array_my_values[119] : '';
$regex_image = isset($array_my_values[120]) ? $array_my_values[120] : '';
$rule_description = isset($array_my_values[121]) ? $array_my_values[121] : '';
$gallery_type = isset($array_my_values[122]) ? $array_my_values[122] : '';
$gallery_expre = isset($array_my_values[123]) ? $array_my_values[123] : '';
$gallery_regex = isset($array_my_values[124]) ? $array_my_values[124] : '';
$replace_gallery_regex= isset($array_my_values[125]) ? $array_my_values[125] : '';
$excerpt_type = isset($array_my_values[126]) ? $array_my_values[126] : '';
$excerpt_expre = isset($array_my_values[127]) ? $array_my_values[127] : '';
$check_words = isset($array_my_values[128]) ? $array_my_values[128] : '';
$auto_captcha = isset($array_my_values[129]) ? $array_my_values[129] : '';
$enable_adblock = isset($array_my_values[130]) ? $array_my_values[130] : '';
$copy_types = isset($array_my_values[131]) ? $array_my_values[131] : '';
$scrape_variations= isset($array_my_values[132]) ? $array_my_values[132] : '';
$second_translate = isset($array_my_values[133]) ? $array_my_values[133] : 'disabled';
$run_raw_html = isset($array_my_values[134]) ? $array_my_values[134] : '';
$no_auto_update = isset($array_my_values[135]) ? $array_my_values[135] : '';
$clickelement = isset($array_my_values[136]) ? $array_my_values[136] : '';
$regular_price_type = isset($array_my_values[137]) ? $array_my_values[137] : '';
$regular_price_expre = isset($array_my_values[138]) ? $array_my_values[138] : '';
$strip_comma_regular = isset($array_my_values[139]) ? $array_my_values[139] : '';
$crawl_include = isset($array_my_values[140]) ? $array_my_values[140] : '';
$featured_replacer= isset($array_my_values[141]) ? $array_my_values[141] : '';
$copy_regex = isset($array_my_values[142]) ? $array_my_values[142] : '';
$variants_label = isset($array_my_values[143]) ? $array_my_values[143] : 'Variants';
$parent_id = isset($array_my_values[144]) ? $array_my_values[144] : '';
$found = 1;
break;
}
$cont = $cont + 1;
}
} else {
crawlomatic_log_to_file('No rules found for crawlomatic_rules_list!');
return 'fail';
}
if($custom_user_agent == 'random' || $custom_user_agent == '')
{
$custom_user_agent = crawlomatic_get_random_user_agent();
}
if($ret_content == 0)
{
$f = fopen(get_temp_dir() . 'crawlomatic_' . $param, 'w');
if($f !== false)
{
$flock_disabled = explode(',', ini_get('disable_functions'));
if(!in_array('flock', $flock_disabled))
{
if (!flock($f, LOCK_EX | LOCK_NB))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_running_list', 'options');
$running = get_option('crawlomatic_running_list', array());
if (!empty($running))
{
if (in_array($rule_unique_id, $running))
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('This rule is already running!');
}
return 'nochange';
}
}
}
}
}
}
if($use_phantom == '1')
{
$phchecked = get_transient('crawlomatic_phantom_check');
if($phchecked === false)
{
$phantom = crawlomatic_testPhantom();
if($phantom === 0)
{
crawlomatic_log_to_file('PhantomJS not found! Please install it on your server or configure the path to it from plugin\'s \'Main Settings\'.');
return 'fail';
}
elseif($phantom === -1)
{
crawlomatic_log_to_file('shell' . '_exec is not enabled on your server. Please enable it and retry using this feature of the plugin.');
return 'fail';
}
elseif($phantom === -2)
{
crawlomatic_log_to_file('shell' . '_exec is not allowed to run on your server (in disable_functions list in php.ini). Please enable it and retry using this feature of the plugin.');
return 'fail';
}
else
{
set_transient('crawlomatic_phantom_check', '1', 2592000);
}
}
}
elseif($use_phantom == '2')
{
$phchecked = get_transient('crawlomatic_puppeteer_check');
if($phchecked === false)
{
$phantom = crawlomatic_testPuppeteer();
if($phantom === 0)
{
crawlomatic_log_to_file('Puppeteer not found! Please install it on your server globally.');
return 'fail';
}
elseif($phantom === -1)
{
crawlomatic_log_to_file('shell' . '_exec is not enabled on your server. Please enable it and retry using this feature of the plugin.');
return 'fail';
}
elseif($phantom === -2)
{
crawlomatic_log_to_file('shell' . '_exec is not allowed to run on your server (in disable_functions list in php.ini). Please enable it and retry using this feature of the plugin.');
return 'fail';
}
else
{
set_transient('crawlomatic_puppeteer_check', '1', 2592000);
}
}
}
elseif($use_phantom == '3')
{
$phchecked = get_transient('crawlomatic_tor_check');
if($phchecked === false)
{
$phantom = crawlomatic_testTor();
if($phantom === 0)
{
crawlomatic_log_to_file('Puppeteer not found! Please install it on your server globally (also Tor).');
return 'fail';
}
elseif($phantom === -1)
{
crawlomatic_log_to_file('shell' . '_exec is not enabled on your server. Please enable it and retry using this feature of the plugin.');
return 'fail';
}
elseif($phantom === -2)
{
crawlomatic_log_to_file('shell' . '_exec is not allowed to run on your server (in disable_functions list in php.ini). Please enable it and retry using this feature of the plugin.');
return 'fail';
}
else
{
set_transient('crawlomatic_tor_check', '1', 2592000);
}
}
}
if(!is_numeric($max_depth))
{
$max_depth = 2;
}
if($source_lang == 'disabled')
{
$source_lang = 'auto';
}
$woo_active = false;
if(!function_exists('is_plugin_active'))
{
include_once( ABSPATH . 'wp-admin/includes/plugin.php' );
}
if (is_plugin_active('woocommerce/woocommerce.php')) {
$woo_active = true;
}
if($rule_unique_id == '')
{
$rule_unique_id = $param;
}
if ($found == 0) {
crawlomatic_log_to_file($param . ' not found in crawlomatic_rules_list!');
return 'fail';
} else {
if($ret_content == 0)
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_rules_list', 'options');
$rules = get_option('crawlomatic_rules_list');
$rules[$param][3] = crawlomatic_get_date_now();
update_option('crawlomatic_rules_list', $rules, false);
}
}
if($ret_content == 0)
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_running_list', 'options');
$running = get_option('crawlomatic_running_list', array());
if (!empty($running)) {
if (in_array($rule_unique_id, $running)) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Only one instance of this rule is allowed. Rule is already running!');
}
return 'nochange';
}
}
$key = time();
if(!isset($running[$key]))
{
$running[$key] = $rule_unique_id;
}
else
{
$running[$key + 1] = $rule_unique_id;
}
update_option('crawlomatic_running_list', $running, false);
register_shutdown_function('crawlomatic_clear_flag_at_shutdown', $rule_unique_id, false);
}
if ($enable_comments == '1') {
$accept_comments = 'open';
}
if($max_paged_depth === '')
{
$max_paged_depth = 3;
}
if (isset($crawlomatic_Main_Settings['do_not_check_duplicates']) && $crawlomatic_Main_Settings['do_not_check_duplicates'] == 'on') {
$no_dupl_crawl = false;
}
else
{
if (isset($crawlomatic_Main_Settings['do_not_crawl_duplicates']) && $crawlomatic_Main_Settings['do_not_crawl_duplicates'] == 'on') {
$no_dupl_crawl = true;
}
else
{
$no_dupl_crawl = false;
}
if (!has_filter('crawlomatic_filter_dup_check'))
{
if($ret_content == 0)
{
if (!isset($crawlomatic_Main_Settings['title_duplicates']) || $crawlomatic_Main_Settings['title_duplicates'] != 'on')
{
$postsPerPage = 50000;
$paged = 0;
wp_suspend_cache_addition(true);
$post_stati = get_post_stati();
foreach ($post_stati as $key => $val) {
if ($val == 'auto-draft') {
unset($post_stati[$key]);
}
if ($val == 'inherit') {
unset($post_stati[$key]);
}
if ($val == 'request-pending') {
unset($post_stati[$key]);
}
if ($val == 'request-confirmed') {
unset($post_stati[$key]);
}
if ($val == 'request-failed') {
unset($post_stati[$key]);
}
if ($val == 'request-completed') {
unset($post_stati[$key]);
}
}
do
{
$postOffset = $paged * $postsPerPage;
$query = array(
'post_status' => $post_stati,
'post_type' => array(
'any'
),
'numberposts' => $postsPerPage,
'fields' => 'ids',
'meta_key' => 'crawlomatic_post_url',
'offset' => $postOffset
);
$post_list = get_posts($query);
foreach ($post_list as $post) {
$orig_url = get_post_meta($post, 'crawlomatic_post_orig_url', true);
if($orig_url == '')
{
$orig_url = get_post_meta($post, 'crawlomatic_post_url', true);
}
$posted_items[$orig_url] = $post;
}
$paged++;
}while(!empty($post_list));
wp_suspend_cache_addition(false);
unset($post_list);
}
else
{
$post_stati = get_post_stati();
foreach ($post_stati as $key => $val) {
if ($val == 'auto-draft') {
unset($post_stati[$key]);
}
if ($val == 'inherit') {
unset($post_stati[$key]);
}
if ($val == 'request-pending') {
unset($post_stati[$key]);
}
if ($val == 'request-confirmed') {
unset($post_stati[$key]);
}
if ($val == 'request-failed') {
unset($post_stati[$key]);
}
if ($val == 'request-completed') {
unset($post_stati[$key]);
}
}
$postsPerPage = 50000;
$paged = 0;
wp_suspend_cache_addition(true);
do
{
$postOffset = $paged * $postsPerPage;
$query = array(
'post_status' => $post_stati,
'post_type' => array(
'any'
),
'numberposts' => $postsPerPage,
'fields' => 'ids',
'meta_key' => 'crawlomatic_item_title',
'offset' => $postOffset
);
$post_list = get_posts($query);
foreach ($post_list as $post) {
$orig_title = get_post_meta($post, 'crawlomatic_item_title', true);
$posted_items[$orig_title] = $post;
}
$paged++;
}while(!empty($post_list));
wp_suspend_cache_addition(false);
unset($post_list);
}
}
}
}
if (isset($crawlomatic_Main_Settings['update_existing']) && $crawlomatic_Main_Settings['update_existing'] == 'on') {
$update_ex = true;
}
else
{
$update_ex = false;
}
if($update_existing == '1')
{
$update_ex = true;
}
if (isset($crawlomatic_Main_Settings['cat_separator']) && $crawlomatic_Main_Settings['cat_separator'] !== '') {
if($cat_sep == '')
{
$cat_sep = $crawlomatic_Main_Settings['cat_separator'];
}
}
else
{
if($cat_sep == '')
{
$cat_sep = ',';
}
}
if($tag_sep == '')
{
$tag_sep = ',';
}
if($crawl_exclude != '')
{
$crawl_exclude = preg_split('/
|
|
/', $crawl_exclude);
$crawl_exclude = array_map('trim', $crawl_exclude);
}
else
{
$crawl_exclude = array();
}
if($crawl_include != '')
{
$crawl_include = preg_split('/
|
|
/', $crawl_include);
$crawl_include = array_map('trim', $crawl_include);
}
else
{
$crawl_include = array();
}
if($crawl_title_exclude != '')
{
$crawl_title_exclude = preg_split('/
|
|
/', $crawl_title_exclude);
$crawl_title_exclude = array_map('trim', $crawl_title_exclude);
}
else
{
$crawl_title_exclude = array();
}
$ids = crawlomatic_replaceSynergyShortcodes($ids);
if($max_results != '')
{
$maximum_crawl = $max_results;
}
else
{
$maximum_crawl = $max;
}
if($seed_type == 'sitemap')
{
require_once (dirname(__FILE__) . "/res/SitemapParser-master/src/SitemapParser/Exceptions/SitemapParserException.php");
require_once (dirname(__FILE__) . "/res/SitemapParser-master/src/SitemapParser/Exceptions/TransferException.php");
require_once (dirname(__FILE__) . "/res/SitemapParser-master/src/SitemapParser/UrlParser.php");
require_once (dirname(__FILE__) . "/res/SitemapParser-master/src/SitemapParser.php");
}
$ids_arr = preg_split('/
|
|
/', trim($ids));
if (isset($crawlomatic_Main_Settings['randomize_order']) && $crawlomatic_Main_Settings['randomize_order'] == 'on') {
shuffle($ids_arr);
}
foreach($ids_arr as $id_el)
{
if(count($items) >= $max)
{
break;
}
if(substr($id_el, 0, 2) === "//")
{
$id_el = 'http:' . $id_el;
}
$GLOBALS['wp_object_cache']->delete('crawlomatic_continue_search', 'options');
$skip_posts_temp = get_option('crawlomatic_continue_search', array());
if(crawlomatic_is_sitemap_or_rss($seed_type))
{
$skip_posts_temp[$param] = '';
}
preg_match_all('{%%counter_(\d+)_(\d+)_(\d+)%%}', $id_el, $counter_matches);
if (!empty($counter_matches[3])) {
$run_counter = $counter_matches[1][0];
do
{
$new_ids1 = preg_replace('{%%counter_(\d+)_(\d+)_(\d+)%%}', $run_counter, $id_el);
if($new_ids1 !== null)
{
$new_ids = $new_ids1;
}
else
{
$new_ids = $id_el;
}
$GLOBALS['crawl_done'] = false;
$GLOBALS['seed'] = true;
$items_xtemp = array();
$items_xtemp = crawlomatic_crawl_page($new_ids, $maximum_crawl, $skip_og, $skip_post_content, $no_external, $required_words, $banned_words, $type, $expre, $title_type, $title_expre, $image_type, $image_expre, $date_type, $date_expre, $cat_type, $cat_expre, intval($max_depth), $custom_cookies, $only_class, $only_id, $no_source, $seed_type, $seed_expre, $crawled_type, $crawled_expre, $paged_crawl_str, $paged_crawl_type, $max_paged_depth, $custom_user_agent, $posted_items, $update_ex, $cat_sep, true, $seed_pag_type, $seed_pag_expre, $price_type, $price_expre, true, $use_proxy, $use_phantom, $no_dupl_crawl, $custom_crawling_expre, $user_pass, $crawl_exclude, $crawl_title_exclude, $encoding, $strip_comma, $reverse_crawl, $lazy_tag, $tag_type, $tag_expre, $tag_sep, $phantom_wait, $param, $continue_search, $author_type, $author_expre, $no_match_query, $post_fields, $request_delay, $require_one, $max_crawl, $check_only_content, $scripter, $local_storage, $download_type, $download_expre, $gallery_type, $gallery_expre, $excerpt_type, $excerpt_expre, $check_words, $auto_captcha, $enable_adblock, false, $scrape_variations, $run_raw_html, $strip_by_regex, $replace_regex, $skip_no_match, $clickelement, $regular_price_type, $regular_price_expre, $strip_comma_regular, $crawl_include);
if($items_xtemp === false || !is_array($items_xtemp))
{
crawlomatic_log_to_file('Failed to get source web page (%%counter_' . $counter_matches[1][0] . '_' . $counter_matches[2][0] . '_' . $counter_matches[3][0] . ')! ' . print_r($items_xtemp, true));
}
else
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file(count($items_xtemp) . ' items scraped in step ' . $run_counter . ' for URL: ' . $new_ids);
}
$items = array_merge($items, $items_xtemp);
}
$run_counter += $counter_matches[3][0];
}
while(count($items) < $max && $run_counter <= $counter_matches[2][0]);
}
else
{
if($continue_search == '1')
{
if(isset($skip_posts_temp[$param]) && trim($skip_posts_temp[$param]) != '')
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Loading URL from saved data (continue crawling) (rule ID ' . $param . '): ' . $skip_posts_temp[$param] . '!');
}
$id_el = $skip_posts_temp[$param];
}
}
else
{
if(isset($skip_posts_temp[$param]))
{
unset($skip_posts_temp[$param]);
update_option('crawlomatic_continue_search', $skip_posts_temp);
}
}
$GLOBALS['crawl_done'] = false;
$GLOBALS['seed'] = true;
$items_xtemp = array();
$items_xtemp = crawlomatic_crawl_page($id_el, $maximum_crawl, $skip_og, $skip_post_content, $no_external, $required_words, $banned_words, $type, $expre, $title_type, $title_expre, $image_type, $image_expre, $date_type, $date_expre, $cat_type, $cat_expre, intval($max_depth), $custom_cookies, $only_class, $only_id, $no_source, $seed_type, $seed_expre, $crawled_type, $crawled_expre, $paged_crawl_str, $paged_crawl_type, $max_paged_depth, $custom_user_agent, $posted_items, $update_ex, $cat_sep, true, $seed_pag_type, $seed_pag_expre, $price_type, $price_expre, true, $use_proxy, $use_phantom, $no_dupl_crawl, $custom_crawling_expre, $user_pass, $crawl_exclude, $crawl_title_exclude, $encoding, $strip_comma, $reverse_crawl, $lazy_tag, $tag_type, $tag_expre, $tag_sep, $phantom_wait, $param, $continue_search, $author_type, $author_expre, $no_match_query, $post_fields, $request_delay, $require_one, $max_crawl, $check_only_content, $scripter, $local_storage, $download_type, $download_expre, $gallery_type, $gallery_expre, $excerpt_type, $excerpt_expre, $check_words, $auto_captcha, $enable_adblock, false, $scrape_variations, $run_raw_html, $strip_by_regex, $replace_regex, $skip_no_match, $clickelement, $regular_price_type, $regular_price_expre, $strip_comma_regular, $crawl_include);
if($items_xtemp === false || !is_array($items_xtemp))
{
crawlomatic_log_to_file('Failed to get source web page, importing will not run from this URL! ' . $id_el . ' - ' . print_r($items_xtemp, true));
if(count($ids_arr) > 1)
{
continue;
}
else
{
if($continue_search == '1' && isset($skip_posts_temp[$param]))
{
unset($skip_posts_temp[$param]);
update_option('crawlomatic_continue_search', $skip_posts_temp);
}
return 'fail';
}
}
else
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file(count($items_xtemp) . ' items scraped for URL: ' . $id_el);
}
$items = array_merge($items, $items_xtemp);
}
}
}
if(count($items) == 0)
{
crawlomatic_log_to_file('All crawled posts are already posted or no content found for your query. Rule ID: ' . esc_html($param) . ': ' . $seed_type . ' -- ' . $seed_expre);
if($continue_search == '1' && isset($skip_posts_temp[$param]))
{
unset($skip_posts_temp[$param]);
update_option('crawlomatic_continue_search', $skip_posts_temp);
}
return 'nochange';
}
else
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Total items scraped: ' . count($items));
}
}
$count = 1;
$init_date = time();
$skip_pcount = 0;
$skipped_pcount = 0;
if($ret_content == 1)
{
$item_xcounter = count($items);
$skip_pcount = rand(0, $item_xcounter-1);
}
if(isset($crawlomatic_Main_Settings['attr_text']) && $crawlomatic_Main_Settings['attr_text'] != '')
{
$img_attr = $crawlomatic_Main_Settings['attr_text'];
}
else
{
$img_attr = '';
}
if (isset($crawlomatic_Main_Settings['def_user']) && is_numeric($crawlomatic_Main_Settings['def_user'])) {
$dff_u = $crawlomatic_Main_Settings['def_user'];
}
else
{
$dff_u = '1';
}
$user_name_type = $post_user_name;
for($iloop = 0; $iloop < count($items); ++$iloop)
{
$css_cont = '';
if($ret_content == 1)
{
if($skip_pcount > $skipped_pcount)
{
$skipped_pcount++;
continue;
}
}
$item_price_multi = $items[$iloop]['price'];
if($item_price_multi !== '' && $item_price_multi !== false)
{
if (isset($crawlomatic_Main_Settings['price_multiply']) && $crawlomatic_Main_Settings['price_multiply'] !== '')
{
$item_price_multi = round($item_price_multi * $crawlomatic_Main_Settings['price_multiply'], 2);
}
if (isset($crawlomatic_Main_Settings['price_add']) && $crawlomatic_Main_Settings['price_add'] !== '')
{
$item_price_multi = $item_price_multi + $crawlomatic_Main_Settings['price_add'];
}
if (isset($crawlomatic_Main_Settings['price_end']) && $crawlomatic_Main_Settings['price_end'] !== '')
{
$item_price_multi = floor($item_price_multi) + $crawlomatic_Main_Settings['price_end'];
}
if (isset($crawlomatic_Main_Settings['d_sep']) && $crawlomatic_Main_Settings['d_sep'] != '' && isset($crawlomatic_Main_Settings['t_sep']) && $crawlomatic_Main_Settings['t_sep'] != '')
{
$d_sep = $crawlomatic_Main_Settings['d_sep'];
$t_sep = $crawlomatic_Main_Settings['t_sep'];
$price_t = number_format($item_price_multi, 2, $d_sep, $t_sep);
if(!empty($price_t))
{
$item_price_multi = $price_t;
}
$price_x = number_format($items[$iloop]['price'], 2, $d_sep, $t_sep);
if(!empty($price_x))
{
$items[$iloop]['price'] = $price_x;
}
}
}
else
{
$item_price_multi = '';
}
if(isset($items[$iloop]['regular_price']))
{
$item_regular_price_multi = $items[$iloop]['regular_price'];
}
else
{
$item_regular_price_multi = '';
}
if($item_regular_price_multi !== '' && $item_regular_price_multi !== false && $item_regular_price_multi !== 0)
{
if (isset($crawlomatic_Main_Settings['price_multiply']) && $crawlomatic_Main_Settings['price_multiply'] !== '')
{
$item_regular_price_multi = round($item_regular_price_multi * $crawlomatic_Main_Settings['price_multiply'], 2);
}
if (isset($crawlomatic_Main_Settings['price_add']) && $crawlomatic_Main_Settings['price_add'] !== '')
{
$item_regular_price_multi = $item_regular_price_multi + $crawlomatic_Main_Settings['price_add'];
}
if (isset($crawlomatic_Main_Settings['price_end']) && $crawlomatic_Main_Settings['price_end'] !== '')
{
$item_regular_price_multi = floor($item_regular_price_multi) + $crawlomatic_Main_Settings['price_end'];
}
if (isset($crawlomatic_Main_Settings['d_sep']) && $crawlomatic_Main_Settings['d_sep'] != '' && isset($crawlomatic_Main_Settings['t_sep']) && $crawlomatic_Main_Settings['t_sep'] != '')
{
$d_sep = $crawlomatic_Main_Settings['d_sep'];
$t_sep = $crawlomatic_Main_Settings['t_sep'];
$price_t = number_format($item_regular_price_multi, 2, $d_sep, $t_sep);
if(!empty($price_t))
{
$item_regular_price_multi = $price_t;
}
if(isset($items[$iloop]['regular_price']))
{
$price_x = number_format($items[$iloop]['regular_price'], 2, $d_sep, $t_sep);
if(!empty($price_x))
{
$items[$iloop]['regular_price'] = $price_x;
}
}
}
}
else
{
$item_regular_price_multi = '';
}
if($item_price_multi == '' && $item_regular_price_multi != '')
{
$item_price_multi = $item_regular_price_multi;
}
$img_found = false;
$update_meta_id = '';
if ($count > intval($max) && !isset($items[$iloop]['variant_parent']))
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Stopping posting, maximum count reached: ' . $max);
}
break;
}
$url = $items[$iloop]['url'];
$title = $items[$iloop]['title'];
if (!isset($crawlomatic_Main_Settings['unchanged_urls']) || $crawlomatic_Main_Settings['unchanged_urls'] != 'on')
{
$url1 = preg_replace('{#(.*)}s', '', $url);
if($url1 !== null)
{
$url = $url1;
}
}
if (!isset($crawlomatic_Main_Settings['title_duplicates']) || $crawlomatic_Main_Settings['title_duplicates'] != 'on')
{
if(has_filter('crawlomatic_filter_dup_check'))
{
$continue_filter = false;
$continue_filter = apply_filters( 'crawlomatic_filter_dup_check', $url );
if($continue_filter === true)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Duplicate checking filter, skipping: ' . $url);
}
continue;
}
}
else
{
if (isset($posted_items[$url])) {
if ($update_ex == true) {
$update_meta_id = $posted_items[$url];
}
else
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Post URL found to be already published: ' . $url);
}
continue;
}
}
}
}
else
{
if(has_filter('crawlomatic_filter_dup_check'))
{
$continue_filter = false;
$continue_filter = apply_filters( 'crawlomatic_filter_dup_check', $title );
if($continue_filter === true)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Duplicate checking filter, skipping: ' . $title);
}
continue;
}
}
else
{
if (isset($posted_items[$title])) {
if ($update_ex == true) {
$update_meta_id = $posted_items[$title];
}
else
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Post found to be already published: ' . $title);
}
continue;
}
}
}
}
if(isset($crawlomatic_Main_Settings['shortest_api']) && $crawlomatic_Main_Settings['shortest_api'] != '')
{
$short_url = crawlomatic_url_handle($url, $crawlomatic_Main_Settings['shortest_api']);
}
else
{
$short_url = $url;
}
$content = $items[$iloop]['content'];
if($limit_content_word_count != '' && is_numeric($limit_content_word_count))
{
$content = crawlomatic_custom_wp_trim_excerpt($content, $limit_content_word_count, $short_url, $read_more);
}
if (trim($lazy_tag) != '' && trim($lazy_tag) != 'src' && strstr($content, trim($lazy_tag)) !== false) {
$lazy_tag = trim($lazy_tag);
$lazy_found = false;
preg_match_all('{<img .*?>}s', $content, $imgsMatchs);
if(isset($imgsMatchs[0][0]))
{
$imgsMatchs = $imgsMatchs[0];
foreach($imgsMatchs as $imgMatch){
if(stristr($imgMatch, $lazy_tag )){
$newImg = $imgMatch;
$newImg1 = preg_replace('{ src=".*?"}', '', $newImg);
if($newImg1 !== null)
{
$newImg = $newImg1;
}
$newImg = str_replace($lazy_tag, 'src', $newImg);
$content = str_replace($imgMatch, $newImg, $content);
$lazy_found = true;
}
}
}
if($lazy_found == false)
{
$content = str_replace(trim($lazy_tag), 'src', $content);
}
preg_match_all('{<iframe .*?>}s', $content, $imgsMatchs);
if(isset($imgsMatchs[0][0]))
{
$imgsMatchs = $imgsMatchs[0];
foreach($imgsMatchs as $imgMatch){
if(stristr($imgMatch, $lazy_tag )){
$newImg = $imgMatch;
$newImg1 = preg_replace('{ src=["\'].*?[\'"]}', '', $newImg);
if($newImg1 !== null)
{
$newImg = $newImg1;
}
if(stristr($lazy_tag, 'srcset') !== false)
{
$newImg1 = preg_replace('{\ssrcset=["\'].*?[\'"]}', '', $newImg);
if($newImg1 !== null)
{
$newImg = $newImg1;
}
$newImg = str_replace($lazy_tag, 'srcset', $newImg);
preg_match_all('#srcset=[\'"](?:([^"\'\s,]+)\s*(?:\s+\d+[wx])(?:,\s*)?)+["\']#', $newImg, $imgma);
if(isset($imgma[1][0]))
{
$newImg1 = preg_replace('#<img#', '<img src="' . $imgma[1][0] . '"', $newImg);
if($newImg1 !== null)
{
$newImg = $newImg1;
}
}
}
else
{
$newImg = str_replace($lazy_tag, 'src', $newImg);
}
$content = str_replace($imgMatch, $newImg, $content);
}
}
}
}
else
{
$content = crawlomatic_lazy_loading_auto_fix($content);
}
if ((isset($crawlomatic_Main_Settings['strip_content_links']) && $crawlomatic_Main_Settings['strip_content_links'] == 'on') || $strip_links == '1') {
$content = crawlomatic_strip_links($content);
}
if ((isset($crawlomatic_Main_Settings['strip_internal_content_links']) && $crawlomatic_Main_Settings['strip_internal_content_links'] == 'on')) {
$content = crawlomatic_strip_internal_links($content, $url);
}
if (isset($crawlomatic_Main_Settings['convert_cyrilic']) && $crawlomatic_Main_Settings['convert_cyrilic'] == "on") {
$content = crawlomatic_replace_cyrilic($content);
$title = crawlomatic_replace_cyrilic($title);
}
if($limit_title_word_count != '' && is_numeric($limit_title_word_count))
{
$title = wp_trim_words($title, intval($limit_title_word_count), '');
}
if (isset($crawlomatic_Main_Settings['title_duplicates']) && $crawlomatic_Main_Settings['title_duplicates'] == 'on')
{
$round_found = false;
foreach($post_array as $parr)
{
if($parr === $items[$iloop]['title'])
{
$round_found = true;
break;
}
}
if($round_found == true)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Duplication found: ' . $items[$iloop]['title']);
}
continue;
}
}
if (isset($crawlomatic_Main_Settings['strip_scripts']) && $crawlomatic_Main_Settings['strip_scripts'] == 'on') {
$content1 = preg_replace('{<script[\s\S]*?\/\s?script>}s', '', $content);
if($content1 !== null)
{
$content = $content1;
}
$content1 = preg_replace('{<ins.*?ins>}s', '', $content);
if($content1 !== null)
{
$content = $content1;
}
$content1 = preg_replace('{<ins.*?>}s', '', $content);
if($content1 !== null)
{
$content = $content1;
}
$content1 = preg_replace('{\(adsbygoogle.*?\);}s', '', $content);
if($content1 !== null)
{
$content = $content1;
}
}
$my_url = parse_url($url);
if(isset($my_url['host']))
{
$my_host = $my_url['host'];
}
else
{
$my_host = '';
}
preg_match_all('{src[\s]*=[\s]*["|\'](.*?)["|\']}is', $content , $matches);
$img_srcs = ($matches[1]);
$replaced_links_img = array();
foreach ($img_srcs as $img_src){
$original_src = $img_src;
$img_src_rel = crawlomatic_fix_single_link($img_src, $url);
if($img_src_rel != $img_src)
{
if(!in_array($img_src, $replaced_links_img))
{
$replaced_links_img[] = $img_src;
$content = str_replace($img_src, $img_src_rel, $content);
}
}
}
if (!isset($crawlomatic_Main_Settings['keep_srcset']) || $crawlomatic_Main_Settings['keep_srcset'] != 'on') {
$content1 = preg_replace('{\ssrcset=".*?"}', ' ', $content);
if($content1 !== null)
{
$content = $content1;
}
$content1 = preg_replace('{\ssizes=".*?"}', ' ', $content);
if($content1 !== null)
{
$content = $content1;
}
}
$content = html_entity_decode($content, ENT_NOQUOTES | ENT_HTML5) ;
if($check_words != 'title')
{
if($check_only_content == '1')
{
if($required_words != '')
{
$required_found = false;
$req_list = explode(',', trim($required_words, ','));
if($require_one == '1')
{
$required_found = false;
foreach($req_list as $rl)
{
if(function_exists('mb_stristr'))
{
if(mb_stristr($content, $rl) !== false)
{
$required_found = true;
break;
}
}
else
{
if(stristr($content, $rl) === false)
{
$required_found = true;
break;
}
}
}
if($required_found === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('No required word found (content), skipping: ' . $url);
}
continue;
}
}
else
{
$need_break = false;
foreach($req_list as $rl)
{
if(function_exists('mb_stristr'))
{
if(mb_stristr($content, $rl) === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Required word not found (content), skipping: ' . $url);
}
$need_break = true;
break;
}
}
else
{
if(stristr($content, $rl) === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Required word not found (content), skipping: ' . $url);
}
$need_break = true;
break;
}
}
}
if($need_break === true)
{
continue;
}
}
}
if($banned_words != '')
{
$ban_list = explode(',', trim($banned_words, ','));
$ban_found = false;
foreach($ban_list as $bl)
{
if(function_exists('mb_stristr'))
{
if(mb_stristr($content, $bl) !== false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Banned word detected (content) [' . $bl . '], skipping it\'s importing: ' . $url);
}
$ban_found = true;
break;
}
}
else
{
if(stristr($content, $bl) !== false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Banned word detected (content) [' . $bl . '], skipping it\'s importing: ' . $url);
}
$ban_found = true;
break;
}
}
}
if($ban_found === true)
{
continue;
}
}
}
}
if($check_words == 'title' || $check_words == 'both')
{
if($required_words != '')
{
$required_found = false;
$req_list = explode(',', trim($required_words, ','));
if($require_one == '1')
{
$required_found = false;
foreach($req_list as $rl)
{
if(function_exists('mb_stristr'))
{
if(mb_stristr($title, $rl) !== false)
{
$required_found = true;
break;
}
}
else
{
if(stristr($title, $rl) === false)
{
$required_found = true;
break;
}
}
}
if($required_found === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('No required word found (title), skipping: ' . $url);
}
continue;
}
}
else
{
$need_break = false;
foreach($req_list as $rl)
{
if(function_exists('mb_stristr'))
{
if(mb_stristr($title, $rl) === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Required word not found (title), skipping: ' . $url);
}
$need_break = true;
break;
}
}
else
{
if(stristr($title, $rl) === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Required word not found (title), skipping: ' . $url);
}
$need_break = true;
break;
}
}
}
if($need_break === true)
{
continue;
}
}
}
if($banned_words != '')
{
$ban_list = explode(',', trim($banned_words, ','));
$ban_found = false;
foreach($ban_list as $bl)
{
if(function_exists('mb_stristr'))
{
if(mb_stristr($title, $bl) !== false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Banned word detected (title) [' . $bl . '], skipping it\'s importing: ' . $url);
}
$ban_found = true;
break;
}
}
else
{
if(stristr($title, $bl) !== false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Banned word detected (title) [' . $bl . '], skipping it\'s importing: ' . $url);
}
$ban_found = true;
break;
}
}
}
if($ban_found === true)
{
continue;
}
}
}
if(isset($crawlomatic_Main_Settings['global_ban_words']) && $crawlomatic_Main_Settings['global_ban_words'] != '')
{
$continue = false;
$banned_list = explode(',', $crawlomatic_Main_Settings['global_ban_words']);
foreach ($banned_list as $banned_word) {
if (stripos($content, trim($banned_word)) !== FALSE) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($title) . '", because it\'s content contains global banned word: ' . $banned_word);
}
$continue = true;
break;
}
if (stripos($title, trim($banned_word)) !== FALSE) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($title) . '", because it\'s title contains global banned word: ' . $banned_word);
}
$continue = true;
break;
}
}
if ($continue === true) {
continue;
}
}
if(isset($crawlomatic_Main_Settings['global_req_words']) && $crawlomatic_Main_Settings['global_req_words'] != '')
{
if(isset($crawlomatic_Main_Settings['require_only_one']) && $crawlomatic_Main_Settings['require_only_one'] == 'on')
{
$continue = true;
$required_list = explode(',', $crawlomatic_Main_Settings['global_req_words']);
foreach ($required_list as $required_word) {
if (stripos($content, trim($required_word)) !== FALSE || stripos($title, trim($required_word)) !== FALSE) {
$continue = false;
break;
}
}
if ($continue === true) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($title) . '", because it\'s content doesn\'t contain global required words.');
}
continue;
}
}
else
{
$continue = false;
$required_list = explode(',', $crawlomatic_Main_Settings['global_req_words']);
foreach ($required_list as $required_word) {
if (stripos($content, trim($required_word)) === FALSE && stripos($title, trim($required_word)) === FALSE) {
$continue = true;
break;
}
}
if ($continue === true) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($title) . '", because it\'s content doesn\'t contain global required words.');
}
continue;
}
}
}
if ($get_css == '1') {
add_action('wp_enqueue_scripts', 'crawlomatic_wp_custom_css_files', 10, 2);
$htmlcontent = crawlomatic_get_web_page($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, '', '', $request_delay);
if ($htmlcontent !== FALSE) {
preg_match_all('/"([^"]+?\.css)"/', $htmlcontent, $matches);
$matches = $matches[0];
$matches = array_unique($matches);
$cont = 0;
foreach ($matches as $match) {
$match = trim(htmlspecialchars_decode($match), '"');
$match = crawlomatic_fix_single_link($match, $url);
if (!crawlomatic_url_exists($match, $use_proxy, $crawlomatic_Main_Settings, $custom_user_agent, $custom_cookies, $user_pass)) {
$tmp_match = 'http:' . $match;
if (!crawlomatic_url_exists($tmp_match, $use_proxy, $crawlomatic_Main_Settings, $custom_user_agent, $custom_cookies, $user_pass)) {
$parts = explode('/', $url);
$dir = '';
for ($i = 0; $i < count($parts) - 1; $i++) {
$dir .= $parts[$i] . "/";
}
$tmp_match = $dir . trim($match, '/');
if (!crawlomatic_url_exists($tmp_match, $use_proxy, $crawlomatic_Main_Settings, $custom_user_agent, $custom_cookies, $user_pass)) {
continue;
} else {
$match = $tmp_match;
}
} else {
$match = $tmp_match;
}
}
$css_temp = crawlomatic_get_web_page($match, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, '', '', $request_delay);
if ($css_temp === FALSE) {
continue;
}
$css_cont .= wp_strip_all_tags($css_temp) . ' ';
}
}
}
$description = crawlomatic_getExcerpt($content);
if($items[$iloop]['crawled_date'] === true)
{
$date = $items[$iloop]['date'];
}
else
{
$postdatex = gmdate("Y-m-d H:i:s", intval($init_date));
$date = $postdatex;
$init_date = $init_date - 1;
}
if($date_index != '')
{
$old_d = strtotime($date);
if($old_d !== false)
{
$newtime = $old_d + ($date_index * 60 * 60);
$date = date("Y-m-d H:i:s", $newtime);
}
}
if (isset($crawlomatic_Main_Settings['skip_old']) && $crawlomatic_Main_Settings['skip_old'] == 'on' && isset($crawlomatic_Main_Settings['skip_year']) && $crawlomatic_Main_Settings['skip_year'] !== '' && isset($crawlomatic_Main_Settings['skip_month']) && isset($crawlomatic_Main_Settings['skip_day'])) {
$old_date = $crawlomatic_Main_Settings['skip_day'] . '-' . $crawlomatic_Main_Settings['skip_month'] . '-' . $crawlomatic_Main_Settings['skip_year'];
$time_date = strtotime($date);
$time_old_date = strtotime($old_date);
if ($time_date !== false && $time_old_date !== false) {
if ($time_date < $time_old_date) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($title) . '", because it is older than ' . $old_date . ' - posted on ' . $date);
}
continue;
}
}
}
$extra_categories = '';
if(is_array($items[$iloop]['categories']))
{
foreach ($items[$iloop]['categories'] as $category)
{
$extra_categories .= $category . ',';
}
$extra_categories = trim($extra_categories, ',');
}
if (isset($crawlomatic_Main_Settings['convert_cyrilic']) && $crawlomatic_Main_Settings['convert_cyrilic'] == "on") {
$extra_categories = crawlomatic_replace_cyrilic($extra_categories);
}
$my_post = array();
$my_post['attach_ids'] = array();
$my_post['update_meta_id'] = $update_meta_id;
$my_post['crawlomatic_enable_pingbacks'] = $enable_pingback;
$my_post['post_type'] = $post_type;
$my_post['comment_status'] = $accept_comments;
if (isset($crawlomatic_Main_Settings['draft_first']) && $crawlomatic_Main_Settings['draft_first'] == 'on')
{
if($post_status == 'publish')
{
$draft_me = true;
$my_post['post_status'] = 'draft';
}
else
{
$my_post['post_status'] = $post_status;
}
}
else
{
$my_post['post_status'] = $post_status;
}
if($user_name_type == 'rnd-crawlomatic')
{
$randid = crawlomatic_display_random_user();
if($randid === false)
{
$post_user_set = $dff_u;
}
else
{
$post_user_set = $randid->ID;
}
$my_post['post_author'] = $post_user_set;
}
elseif($user_name_type == 'feed-crawlomatic')
{
if($items[$iloop]['author'] != '')
{
if(username_exists( sanitize_user($items[$iloop]['author']) ))
{
$user_id_t = get_user_by('login', sanitize_user($items[$iloop]['author']));
if($user_id_t)
{
$post_user_set = $user_id_t->ID;
}
else
{
$post_user_set = $dff_u;
}
}
else
{
$palphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^*()-+=_?><,.;:}{][';
$ppass = '';
$alphaLength = strlen($palphabet) - 1;
for ($ipass = 0; $ipass < 8; $ipass++)
{
$npass = rand(0, $alphaLength);
$ppass .= $palphabet[$npass];
}
$curr_id = wp_create_user(sanitize_user($items[$iloop]['author']), $ppass, crawlomatic_generate_random_email());
if ( is_int($curr_id) )
{
$u = new WP_User($curr_id);
$u->remove_role('subscriber');
$u->add_role('editor');
$post_user_set = $curr_id;
update_user_meta($curr_id,'last_name', $items[$iloop]['author']);
}
else
{
$post_user_set = $dff_u;
}
}
}
else
{
$post_user_set = $dff_u;
}
$my_post['post_author'] = $post_user_set;
}
elseif($user_name_type == 'url-crawlomatic')
{
if($my_host != '')
{
$my_host = preg_replace('#^www\.(.+)#i', '$1', $my_host);
if(username_exists( sanitize_user($my_host) ))
{
$user_id_t = get_user_by('login', sanitize_user($my_host));
if($user_id_t)
{
$post_user_set = $user_id_t->ID;
}
else
{
$post_user_set = $dff_u;
}
}
else
{
$palphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^*()-+=_?><,.;:}{][';
$ppass = '';
$alphaLength = strlen($palphabet) - 1;
for ($ipass = 0; $ipass < 8; $ipass++)
{
$npass = rand(0, $alphaLength);
$ppass .= $palphabet[$npass];
}
$curr_id = wp_create_user(sanitize_user($my_host), $ppass, crawlomatic_generate_random_email());
if ( is_int($curr_id) )
{
$u = new WP_User($curr_id);
$u->remove_role('subscriber');
$u->add_role('editor');
$post_user_set = $curr_id;
update_user_meta($curr_id,'last_name', $my_host);
}
else
{
$post_user_set = $dff_u;
}
}
}
else
{
$post_user_set = $dff_u;
}
$my_post['post_author'] = $post_user_set;
}
else
{
$my_post['post_author'] = $post_user_name;
}
$item_tags = '';
if(is_array($items[$iloop]['tags']))
{
foreach ($items[$iloop]['tags'] as $xtag)
{
$item_tags .= $xtag . ',';
}
$item_tags = trim($item_tags, ',');
}
$item_download = array();
$my_post['post_gallery'] = $items[$iloop]['gallery'];
if(count($items[$iloop]['download_remote']) > 0)
{
$item_download = $items[$iloop]['download_remote'];
$my_post['download_local'] = $items[$iloop]['download_local'];
}
else
{
$my_post['download_local'] = array();
}
if (isset($crawlomatic_Main_Settings['convert_cyrilic']) && $crawlomatic_Main_Settings['convert_cyrilic'] == "on") {
$item_tags = crawlomatic_replace_cyrilic($item_tags);
}
if ($can_create_tag == '1') {
$my_post['tags_input'] = ($item_create_tag != '' ? $item_create_tag . ',' : '') . $item_tags;
} else if ($item_create_tag != '') {
$my_post['tags_input'] = $item_create_tag;
}
$orig_content = '';
$my_post['crawlomatic_post_url'] = $short_url;
$my_post['crawlomatic_post_orig_url'] = $url;
$my_post['crawlomatic_post_date'] = $date;
$get_img = '';
if($royalty_free == '1')
{
if(isset($crawlomatic_Main_Settings['textrazor_key']) && trim($crawlomatic_Main_Settings['textrazor_key']) != '')
{
try
{
if(!class_exists('TextRazor'))
{
require_once(dirname(__FILE__) . "/res/TextRazor.php");
}
TextRazorSettings::setApiKey(trim($crawlomatic_Main_Settings['textrazor_key']));
$textrazor = new TextRazor();
$textrazor->addExtractor('entities');
$response = $textrazor->analyze($title);
if (isset($response['response']['entities']))
{
foreach ($response['response']['entities'] as $entity)
{
$query_words = '';
if(isset($entity['entityEnglishId']))
{
$query_words = $entity['entityEnglishId'];
}
else
{
$query_words = $entity['entityId'];
}
if($query_words != '')
{
$get_img = crawlomatic_get_free_image($crawlomatic_Main_Settings, $query_words, $img_attr, 10);
if($get_img == false)
{
$get_img = '';
}
if(!empty($get_img))
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Royalty Free Featured Image Generated with help of TextRazor (kw: "' . $query_words . '"): ' . $get_img);
}
break;
}
}
}
}
}
catch(Exception $e)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to search for keywords using TextRazor: ' . $e->getMessage());
}
}
}
if(empty($get_img))
{
$keyword_class = new Crawlomatic_keywords();
$query_words = $keyword_class->keywords($title, 2);
$get_img = crawlomatic_get_free_image($crawlomatic_Main_Settings, $query_words, $img_attr, 10);
if($get_img == '' || $get_img === false)
{
if(isset($crawlomatic_Main_Settings['bimage']) && $crawlomatic_Main_Settings['bimage'] == 'on')
{
$query_words = $keyword_class->keywords($title, 1);
$get_img = crawlomatic_get_free_image($crawlomatic_Main_Settings, $query_words, $img_attr, 20);
if($get_img == '' || $get_img === false)
{
if(isset($crawlomatic_Main_Settings['no_orig']) && $crawlomatic_Main_Settings['no_orig'] == 'on')
{
$get_img = '';
}
else
{
$get_img = $items[$iloop]['image'];
}
}
}
else
{
if(isset($crawlomatic_Main_Settings['no_orig']) && $crawlomatic_Main_Settings['no_orig'] == 'on')
{
$get_img = '';
}
else
{
$get_img = $items[$iloop]['image'];
}
}
}
}
}
else
{
$get_img = $items[$iloop]['image'];
}
if($get_img != '')
{
$img_found = true;
$get_img = crawlomatic_fix_single_link($get_img, $url);
}
if(substr($get_img, 0, 2) === "//")
{
if(substr($url, 0, 5) === "https")
{
$get_img = 'https:' . $get_img;
}
else
{
$get_img = 'http:' . $get_img;
}
}
if($image_type == 'gallery')
{
if ($gallery_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $gallery_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_gallery_regex);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_cont_gallery = preg_replace("~" . $sbr . "~i", $repreg, $get_img);
if($temp_cont_gallery !== NULL)
{
$get_img = $temp_cont_gallery;
}
}
}
}
if (isset($crawlomatic_Main_Settings['skip_image_names']) && $crawlomatic_Main_Settings['skip_image_names'] != '' && $get_img != '')
{
$need_to_continue = false;
$skip_images = explode(',', $crawlomatic_Main_Settings['skip_image_names']);
foreach($skip_images as $ski)
{
if(crawlomatic_stringMatchWithWildcard($get_img, trim($ski)))
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($title) . '", because it has excluded image name: ' . $get_img . ' - ' . $ski);
}
$need_to_continue = true;
break;
}
}
if($need_to_continue == true)
{
continue;
}
}
if ($featured_image == '1' && ($skip_no_image == '1' || (isset($crawlomatic_Main_Settings['skip_no_img']) && $crawlomatic_Main_Settings['skip_no_img'] == 'on')) && $img_found == false) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($title) . '", because it has no detected image file attached');
}
continue;
}
if ($strip_by_id != '') {
require_once (dirname(__FILE__) . "/res/simple_html_dom.php");
$strip_list = explode(',', $strip_by_id);
$extractok = false;
$html_dom_original_html = crawlomatic_str_get_html($content);
if($html_dom_original_html !== false && method_exists($html_dom_original_html, 'find')){
foreach ($strip_list as $strip_id) {
$ret = $html_dom_original_html->find('*[id="'.trim($strip_id).'"]');
foreach ($ret as $itm ) {
$extractok = true;
$itm->outertext = '' ;
}
}
$content = $html_dom_original_html->save();
$html_dom_original_html->clear();
unset($html_dom_original_html);
}
if($extractok == false)
{
foreach ($strip_list as $strip_id) {
if(trim($strip_id) == '')
{
continue;
}
$content_r = crawlomatic_removeTagByID($content, trim($strip_id));
if($content_r !== false)
{
$content = $content_r;
}
}
}
}
if ($strip_by_class != '') {
require_once (dirname(__FILE__) . "/res/simple_html_dom.php");
$strip_list = explode(',', $strip_by_class);
$extractok = false;
$html_dom_original_html = crawlomatic_str_get_html($content);
if($html_dom_original_html !== false && method_exists($html_dom_original_html, 'find')){
foreach ($strip_list as $strip_class) {
if(trim($strip_class) == '')
{
continue;
}
$ret = $html_dom_original_html->find('*[class="'.trim($strip_class).'"]');
foreach ($ret as $itm ) {
$extractok = true;
$itm->outertext = '' ;
}
}
$content = $html_dom_original_html->save();
$html_dom_original_html->clear();
unset($html_dom_original_html);
}
if($extractok == false)
{
foreach ($strip_list as $strip_class) {
if(trim($strip_class) == '')
{
continue;
}
$content_r = crawlomatic_removeTagByClass($content, trim($strip_class));
if($content_r !== false)
{
$content = $content_r;
}
}
}
}
if ($strip_by_xpath != '') {
require_once (dirname(__FILE__) . "/res/simple_html_dom.php");
$strip_by_xpath_arr = preg_split('/
|
|
/', $strip_by_xpath);
foreach($strip_by_xpath_arr as $fxx)
{
$extractok = false;
$html_dom_original_html = crawlomatic_str_get_html($content);
if($html_dom_original_html !== false && method_exists($html_dom_original_html, 'find')){
$ret = $html_dom_original_html->find($fxx);
foreach ($ret as $itm ) {
$extractok = true;
$itm->outertext = '' ;
}
$content = $html_dom_original_html->save();
$html_dom_original_html->clear();
unset($html_dom_original_html);
}
if($extractok == false)
{
$content_r = crawlomatic_removeTagByXPath($content, trim($fxx));
if($content_r !== false)
{
$content = $content_r;
}
}
}
}
if ($strip_html_by_xpath != '') {
$strip_html_by_xpath_arr = preg_split('/
|
|
/', $strip_html_by_xpath);
require_once (dirname(__FILE__) . "/res/simple_html_dom.php");
foreach($strip_html_by_xpath_arr as $fx)
{
$extractok = false;
$html_dom_original_html = crawlomatic_str_get_html($content);
if($html_dom_original_html !== false && method_exists($html_dom_original_html, 'find')){
$ret = $html_dom_original_html->find($fx);
foreach ($ret as $itm ) {
$extractok = true;
$itm->outertext = strip_tags($itm->outertext) ;
}
$content = $html_dom_original_html->save();
$html_dom_original_html->clear();
unset($html_dom_original_html);
}
if($extractok == false)
{
$content_r = crawlomatic_removeHTMLByXPath($content, trim($fx));
if($content_r !== false)
{
$content = $content_r;
}
}
}
}
if ($strip_by_tag != '') {
require_once (dirname(__FILE__) . "/res/simple_html_dom.php");
$strip_list = explode(',', $strip_by_tag);
$extractok = false;
$html_dom_original_html = crawlomatic_str_get_html($content);
if($html_dom_original_html !== false && method_exists($html_dom_original_html, 'find')){
foreach ($strip_list as $strip_tag) {
$strip_tag = trim($strip_tag);
if($strip_tag != '')
{
$ret = $html_dom_original_html->find($strip_tag);
foreach ($ret as $itm ) {
$extractok = true;
$itm->outertext = '' ;
}
}
}
$content = $html_dom_original_html->save();
$html_dom_original_html->clear();
unset($html_dom_original_html);
}
if($extractok == false)
{
foreach ($strip_list as $strip_tag) {
if(trim($strip_tag) == '')
{
continue;
}
$content_r = crawlomatic_removeTagByTag($content, trim($strip_tag));
if($content_r !== false)
{
$content = $content_r;
}
}
}
}
if ($only_text == '1') {
$content = crawlomatic_strip_html_tags($content, $allow_html_tags);
}
$content = crawlomatic_fix_links($content, $url);
$postdate = strtotime($date);
if($postdate !== FALSE)
{
$postdate = gmdate("Y-m-d H:i:s", intval($postdate));
}
if($postdate !== FALSE)
{
if($items[$iloop]['crawled_date'] === true)
{
$my_post['post_date_gmt'] = $postdate;
}
else
{
$my_post['post_date_gmt'] = $postdate;
}
}
if(isset($items[$iloop]['custom_shortcodes']) && is_array($items[$iloop]['custom_shortcodes']))
{
$custom_shortcodes_arr = $items[$iloop]['custom_shortcodes'];
}
else
{
$custom_shortcodes_arr = array();
}
if($postdate === false)
{
$postdate = $date;
}
if($content_percent != '' && is_numeric($content_percent) && $content_percent != 100)
{
$temp_t = crawlomatic_strip_html_tags($content);
$temp_t = str_replace(' ',"",$temp_t);
$ccount = str_word_count($temp_t);
if($ccount > 10)
{
$str_count = strlen($content);
$leave_cont = round($str_count * $content_percent / 100);
$content = crawlomatic_substr_close_tags($content, $leave_cont);
}
else
{
$ccount = crawlomatic_count_unicode_words($temp_t);
if($ccount > 10)
{
$str_count = strlen($content);
$leave_cont = round($str_count * $content_percent / 100);
$content = crawlomatic_substr_close_tags($content, $leave_cont);
}
}
}
$screenimageURL = '';
$screens_attach_id = '';
if(isset($items[$iloop]['screen_image']) && $items[$iloop]['screen_image'] != '')
{
if($attach_screen == '1' || (strstr($post_content, '%%item_show_screenshot%%') !== false || strstr($post_content, '%%item_screenshot_url%%') !== false || strstr($custom_fields, '%%item_show_screenshot%%') !== false || strstr($custom_fields, '%%item_screenshot_url%%') !== false || strstr($custom_tax, '%%item_show_screenshot%%') !== false || strstr($custom_tax, '%%item_screenshot_url%%') !== false))
{
$screenimageURL = $items[$iloop]['screen_image'];
}
}
else
{
if (isset($crawlomatic_Main_Settings['headless_screen']) && $crawlomatic_Main_Settings['headless_screen'] == 'on')
{
if($attach_screen == '1' || (strstr($post_content, '%%item_show_screenshot%%') !== false || strstr($post_content, '%%item_screenshot_url%%') !== false || strstr($custom_fields, '%%item_show_screenshot%%') !== false || strstr($custom_fields, '%%item_screenshot_url%%') !== false || strstr($custom_tax, '%%item_show_screenshot%%') !== false || strstr($custom_tax, '%%item_screenshot_url%%') !== false))
{
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$phantomjs_comm .= '--proxy=' . trim($prx[$randomness]) . ' ';
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$phantomjs_comm .= '--proxy-auth=' . trim($prx_auth[$randomness]) . ' ';
}
}
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
if (isset($crawlomatic_Main_Settings['screenshot_height']) && $crawlomatic_Main_Settings['screenshot_height'] != '')
{
$h = esc_attr($crawlomatic_Main_Settings['screenshot_height']);
}
else
{
$h = '0';
}
if (isset($crawlomatic_Main_Settings['screenshot_width']) && $crawlomatic_Main_Settings['screenshot_width'] != '')
{
$w = esc_attr($crawlomatic_Main_Settings['screenshot_width']);
}
else
{
$w = '1920';
}
$screenshotimg = crawlomatic_get_screenshot_PuppeteerAPI($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, '', $request_delay, $scripter, $local_storage, $h, $w, $auto_captcha, $enable_adblock, $clickelement);
if($screenshotimg !== false)
{
$upload_dir = wp_upload_dir();
$dir_name = $upload_dir['basedir'] . '/crawlomatic-files';
$dir_url = $upload_dir['baseurl'] . '/crawlomatic-files';
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if (!$wp_filesystem->exists($dir_name)) {
wp_mkdir_p($dir_name);
}
$screen_name = uniqid();
$screenimageName = $dir_name . '/' . $screen_name . '.jpg';
$screenimageURL = $dir_url . '/' . $screen_name . '.jpg';
$is_fail = $wp_filesystem->put_contents($screenimageName, $screenshotimg);
if($is_fail === false)
{
crawlomatic_log_to_file('Error in writing screenshot to file: ' . $screenimageName);
}
else
{
$wp_filetype = wp_check_filetype( $screen_name . '.jpg', null );
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name( $screen_name . '.jpg' ),
'post_content' => '',
'post_status' => 'inherit'
);
$screens_attach_id = wp_insert_attachment($attachment, $screenimageName);
require_once( ABSPATH . 'wp-admin/includes/image.php' );
require_once( ABSPATH . 'wp-admin/includes/media.php' );
$attach_data = wp_generate_attachment_metadata($screens_attach_id, $screenimageName);
wp_update_attachment_metadata( $screens_attach_id, $attach_data );
}
}
}
}
elseif (isset($crawlomatic_Main_Settings['phantom_screen']) && $crawlomatic_Main_Settings['phantom_screen'] == 'on')
{
if($attach_screen == '1' || (strstr($post_content, '%%item_show_screenshot%%') !== false || strstr($post_content, '%%item_screenshot_url%%') !== false || strstr($custom_fields, '%%item_show_screenshot%%') !== false || strstr($custom_fields, '%%item_screenshot_url%%') !== false || strstr($custom_tax, '%%item_show_screenshot%%') !== false || strstr($custom_tax, '%%item_screenshot_url%%') !== false))
{
if(function_exists('shell' . '_exec'))
{
$disabled = explode(',', ini_get('disable_functions'));
if(!in_array('shell' . '_exec', $disabled))
{
if (isset($crawlomatic_Main_Settings['phantom_path']) && $crawlomatic_Main_Settings['phantom_path'] != '')
{
$phantomjs_comm = $crawlomatic_Main_Settings['phantom_path'] . ' ';
}
else
{
$phantomjs_comm = 'phantomjs ';
}
if (isset($crawlomatic_Main_Settings['screenshot_height']) && $crawlomatic_Main_Settings['screenshot_height'] != '')
{
$h = esc_attr($crawlomatic_Main_Settings['screenshot_height']);
}
else
{
$h = '0';
}
if (isset($crawlomatic_Main_Settings['screenshot_width']) && $crawlomatic_Main_Settings['screenshot_width'] != '')
{
$w = esc_attr($crawlomatic_Main_Settings['screenshot_width']);
}
else
{
$w = '1920';
}
$upload_dir = wp_upload_dir();
$dir_name = $upload_dir['basedir'] . '/crawlomatic-files';
$dir_url = $upload_dir['baseurl'] . '/crawlomatic-files';
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if (!$wp_filesystem->exists($dir_name)) {
wp_mkdir_p($dir_name);
}
$screen_name = uniqid();
$screenimageName = $dir_name . '/' . $screen_name;
$screenimageURL = $dir_url . '/' . $screen_name . '.jpg';
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$phantomjs_comm .= '--proxy=' . trim($prx[$randomness]) . ' ';
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$phantomjs_comm .= '--proxy-auth=' . trim($prx_auth[$randomness]) . ' ';
}
}
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
$shefunc = trim(' s ') . trim(' h ') . 'ell' . '_exec';
$cmdResult = $shefunc($phantomjs_comm . '"' . dirname(__FILE__) .'/res/phantomjs/phantom-screenshot.js"' . ' "'. dirname(__FILE__) . '" "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '" 2>&1');
if($cmdResult === NULL || $cmdResult == '' || trim($cmdResult) === 'timeout' || stristr($cmdResult, 'sh: phantomjs: command not found') !== false)
{
$screenimageURL = '';
crawlomatic_log_to_file('Error in phantomjs screenshot: exec: ' . $phantomjs_comm . '"' . dirname(__FILE__) .'/res/phantomjs/phantom-screenshot.js"' . ' "'. dirname(__FILE__) . '" "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '" , reterr: ' . $cmdResult);
}
else
{
if($wp_filesystem->exists($screenimageName))
{
$wp_filetype = wp_check_filetype( $screen_name . '.jpg', null );
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name( $screen_name . '.jpg' ),
'post_content' => '',
'post_status' => 'inherit'
);
$screens_attach_id = wp_insert_attachment( $attachment, $screenimageName . '.jpg' );
require_once( ABSPATH . 'wp-admin/includes/image.php' );
require_once( ABSPATH . 'wp-admin/includes/media.php' );
$attach_data = wp_generate_attachment_metadata( $screens_attach_id, $screenimageName . '.jpg' );
wp_update_attachment_metadata( $screens_attach_id, $attach_data );
}
else
{
crawlomatic_log_to_file('Error in phantomjs screenshot not found: exec: ' . $phantomjs_comm . '"' . dirname(__FILE__) .'/res/phantomjs/phantom-screenshot.js"' . ' "'. dirname(__FILE__) . '" "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '" , reterr: ' . $cmdResult);
}
}
}
}
}
}
elseif (isset($crawlomatic_Main_Settings['puppeteer_screen']) && $crawlomatic_Main_Settings['puppeteer_screen'] == 'on')
{
if($attach_screen == '1' || (strstr($post_content, '%%item_show_screenshot%%') !== false || strstr($post_content, '%%item_screenshot_url%%') !== false || strstr($custom_fields, '%%item_show_screenshot%%') !== false || strstr($custom_fields, '%%item_screenshot_url%%') !== false || strstr($custom_tax, '%%item_show_screenshot%%') !== false || strstr($custom_tax, '%%item_screenshot_url%%') !== false))
{
if(function_exists('shell' . '_exec'))
{
$disabled = explode(',', ini_get('disable_functions'));
if(!in_array('shell' . '_exec', $disabled))
{
$phantomjs_comm = 'node ';
if (isset($crawlomatic_Main_Settings['screenshot_height']) && $crawlomatic_Main_Settings['screenshot_height'] != '')
{
$h = esc_attr($crawlomatic_Main_Settings['screenshot_height']);
}
else
{
$h = '0';
}
if (isset($crawlomatic_Main_Settings['screenshot_width']) && $crawlomatic_Main_Settings['screenshot_width'] != '')
{
$w = esc_attr($crawlomatic_Main_Settings['screenshot_width']);
}
else
{
$w = '1920';
}
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = 'default';
}
if ($w < 350) {
$w = 350;
}
if ($w > 1920) {
$w = 1920;
}
$upload_dir = wp_upload_dir();
$dir_name = $upload_dir['basedir'] . '/crawlomatic-files';
$dir_url = $upload_dir['baseurl'] . '/crawlomatic-files';
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if (!$wp_filesystem->exists($dir_name)) {
wp_mkdir_p($dir_name);
}
$screen_name = uniqid();
$screenimageName = $dir_name . '/' . $screen_name . '.jpg';
$screenimageURL = $dir_url . '/' . $screen_name . '.jpg';
$phantomjs_proxcomm = '"null"';
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$phantomjs_proxcomm = '"' . trim($prx[$randomness]);
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$phantomjs_proxcomm .= '~~~' . trim($prx_auth[$randomness]);
}
}
$phantomjs_proxcomm .= '"';
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
$shefunc = trim(' s ') . trim(' h ') . 'ell' . '_exec';
$cmdResult = $shefunc($phantomjs_comm . '"' . dirname(__FILE__) .'/res/puppeteer/screenshot.js"' . ' "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' ' . $phantomjs_proxcomm . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . $phantomjs_timeout . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '" 2>&1');
if(stristr($cmdResult, 'sh: node: command not found') !== false || stristr($cmdResult, 'throw err;') !== false)
{
$screenimageURL = '';
crawlomatic_log_to_file('Error in puppeteer screenshot: exec: ' . $phantomjs_comm . '"' . dirname(__FILE__) .'/res/puppeteer/screenshot.js"' . ' "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' ' . $phantomjs_proxcomm . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . $phantomjs_timeout . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '", reterr: ' . $cmdResult);
}
else
{
if($wp_filesystem->exists($screenimageName))
{
$wp_filetype = wp_check_filetype( $screen_name . '.jpg', null );
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name( $screen_name . '.jpg' ),
'post_content' => '',
'post_status' => 'inherit'
);
$screens_attach_id = wp_insert_attachment( $attachment, $screenimageName);
require_once( ABSPATH . 'wp-admin/includes/image.php' );
require_once( ABSPATH . 'wp-admin/includes/media.php' );
$attach_data = wp_generate_attachment_metadata( $screens_attach_id, $screenimageName);
wp_update_attachment_metadata( $screens_attach_id, $attach_data );
}
else
{
crawlomatic_log_to_file('Error in puppeteer screenshot not found: exec: ' . $phantomjs_comm . '"' . dirname(__FILE__) .'/res/puppeteer/screenshot.js"' . ' "' . $url . '" "' . $screenimageName . '" ' . $w . ' ' . $h . ' ' . $phantomjs_proxcomm . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . $phantomjs_timeout . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '", reterr: ' . $cmdResult);
}
}
}
}
}
}
}
if (isset($crawlomatic_Main_Settings['disable_excerpt']) && $crawlomatic_Main_Settings['disable_excerpt'] == "on")
{
$my_post['post_excerpt'] = '';
}
else
{
if(isset($items[$iloop]['excerpt']) && trim($items[$iloop]['excerpt']) !== '')
{
$arr = crawlomatic_spin_and_translate('test', $items[$iloop]['excerpt'], $translate, $source_lang, $use_proxy, $no_spin, $second_translate);
$new_post_excerpt = $arr[1];
if(trim($replace_words) != '')
{
$replace_arr = explode(',', trim($replace_words));
$replace_arr = array_map('trim', $replace_arr);
foreach($replace_arr as $rex)
{
$repla_parts = explode('|', $rex);
if(!isset($repla_parts[1]))
{
continue;
}
$new_post_excerpt = str_replace($repla_parts[0], $repla_parts[1], $new_post_excerpt);
}
}
$my_post['post_excerpt'] = trim($new_post_excerpt);
$description = trim($new_post_excerpt);
}
}
if ($limit_word_count !== "") {
$content = crawlomatic_custom_wp_trim_excerpt($content, $limit_word_count, $short_url, $read_more);
}
if (isset($crawlomatic_Main_Settings['only_imported']) && $crawlomatic_Main_Settings['only_imported'] == 'on')
{
$arr = crawlomatic_spin_and_translate($title, $content, $translate, $source_lang, $use_proxy, $no_spin, $second_translate);
$title = $arr[0];
$content = $arr[1];
}
$new_post_content = crawlomatic_replaceContentShortcodes($post_content, $title, $content, $short_url, $extra_categories, $item_tags, $get_img, $description, $read_more, $postdate, $items[$iloop]['price'], $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $items[$iloop]['gallery'], $gallery_regex, $replace_gallery_regex, $items[$iloop]['regular_price'], $item_regular_price_multi);
$new_post_title = crawlomatic_replaceContentShortcodes($post_title, $title, $content, $short_url, $extra_categories, $item_tags, $get_img, $description, $read_more, $postdate, $items[$iloop]['price'], $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $items[$iloop]['gallery'], $gallery_regex, $replace_gallery_regex, $items[$iloop]['regular_price'], $item_regular_price_multi);
if(trim($replace_words) != '')
{
$replace_arr = explode(',', trim($replace_words));
$replace_arr = array_map('trim', $replace_arr);
foreach($replace_arr as $rex)
{
$repla_parts = explode('|', $rex);
if(!isset($repla_parts[1]))
{
continue;
}
$new_post_content = str_replace($repla_parts[0], $repla_parts[1], $new_post_content);
$new_post_title = str_replace($repla_parts[0], $repla_parts[1], $new_post_title);
}
}
if($run_raw_html != '1')
{
if ($strip_by_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $strip_by_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_regex);
$xcnt = 0;
$need_to_cont = false;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
if($skip_no_match == '1')
{
$rezu = preg_match_all($sbr, $new_post_content, $reqmatches);
if($rezu === false)
{
preg_match_all("~" . $sbr . "~i", $new_post_content, $reqmatches);
}
if(!isset($reqmatches[0][0]))
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($new_post_title) . '", because Regex not matched: ' . $sbr);
}
$need_to_cont = true;
break;
}
else
{
if(crawlomatic_isRegularExpression("~" . $sbr . "~i") === false)
{
crawlomatic_log_to_file('Incorrect strip regex entered: ' . "~" . $sbr . "~i");
}
else
{
$temp_cont = preg_replace("~" . $sbr . "~i", $repreg, $new_post_content);
if($temp_cont !== NULL)
{
$new_post_content = $temp_cont;
}
}
}
}
else
{
$temp_cont = preg_replace("~" . $sbr . "~i", $repreg, $new_post_content);
if($temp_cont !== NULL)
{
$new_post_content = $temp_cont;
}
}
}
if($need_to_cont == true)
{
continue;
}
}
}
if (isset($crawlomatic_Main_Settings['strip_links']) && $crawlomatic_Main_Settings['strip_links'] == 'on') {
$new_post_content = crawlomatic_strip_links($new_post_content);
}
$my_post['screen_attach'] = $screens_attach_id;
$my_post['extra_categories'] = $extra_categories;
$my_post['extra_tags'] = $item_tags;
if (!isset($crawlomatic_Main_Settings['only_imported']) || $crawlomatic_Main_Settings['only_imported'] != 'on')
{
$arr = crawlomatic_spin_and_translate($new_post_title, $new_post_content, $translate, $source_lang, $use_proxy, $no_spin, $second_translate);
$new_post_title = $arr[0];
$new_post_content = $arr[1];
}
$new_post_title = html_entity_decode($new_post_title);
$new_post_content = html_entity_decode($new_post_content);
$title_count = -1;
if (isset($crawlomatic_Main_Settings['min_word_title']) && $crawlomatic_Main_Settings['min_word_title'] != '') {
$title_count = str_word_count($new_post_title);
if ($title_count < intval($crawlomatic_Main_Settings['min_word_title'])) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($new_post_title) . '", because title length (' . $title_count . ') < ' . $crawlomatic_Main_Settings['min_word_title']);
}
continue;
}
}
if (isset($crawlomatic_Main_Settings['max_word_title']) && $crawlomatic_Main_Settings['max_word_title'] != '') {
if ($title_count == -1) {
$title_count = str_word_count($new_post_title);
}
if ($title_count > intval($crawlomatic_Main_Settings['max_word_title'])) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($new_post_title) . '", because title length (' . $title_count . ') > ' . $crawlomatic_Main_Settings['max_word_title']);
}
continue;
}
}
$content_count = -1;
if (isset($crawlomatic_Main_Settings['min_word_content']) && $crawlomatic_Main_Settings['min_word_content'] != '') {
$content_count = str_word_count(crawlomatic_strip_html_tags($new_post_content));
if ($content_count < intval($crawlomatic_Main_Settings['min_word_content'])) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($new_post_title) . '", because content length (' . $content_count . ') < ' . $crawlomatic_Main_Settings['min_word_content']);
}
continue;
}
}
if (isset($crawlomatic_Main_Settings['max_word_content']) && $crawlomatic_Main_Settings['max_word_content'] != '') {
if ($content_count == -1) {
$content_count = str_word_count(crawlomatic_strip_html_tags($new_post_content));
}
if ($content_count > intval($crawlomatic_Main_Settings['max_word_content'])) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($new_post_title) . '", because content length (' . $content_count . ') > ' . $crawlomatic_Main_Settings['max_word_content']);
}
continue;
}
}
if (isset($crawlomatic_Main_Settings['replace_url']) && $crawlomatic_Main_Settings['replace_url'] !== '') {
if(strstr($crawlomatic_Main_Settings['replace_url'], '%%original_url%%') !== false)
{
$repl = str_replace('%%original_url%%', '', $crawlomatic_Main_Settings['replace_url']);
$new_post_content1 = preg_replace('/<a(.+?)href=["\']([^"\']+?)["\']([^>]*?)>/i','<a$1href="$2' . esc_html($repl) . '"$3>', $new_post_content);
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
else
{
$new_post_content1 = preg_replace('/<a(.+?)href=["\']([^"\']+?)["\']([^>]*?)>/i','<a$1href="' . esc_url($crawlomatic_Main_Settings['replace_url']) . '"$3>', $new_post_content);
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
}
if ($strip_images == '1') {
$new_post_content = crawlomatic_strip_images($new_post_content);
}
$copy_str = '';
$also_imgs = false;
if($copy_types != '')
{
$xcopy_types = explode(',', $copy_types);
$xcopy_types = array_map('trim', $xcopy_types);
foreach($xcopy_types as $cpa)
{
$copy_str .= trim($cpa, '.') . '|';
}
$copy_str = trim($copy_str, '|');
}
if ($copy_images == '1' || (isset($crawlomatic_Main_Settings['copy_images']) && $crawlomatic_Main_Settings['copy_images'] == 'on'))
{
$also_imgs = true;
if($copy_str == '')
{
$copy_str = 'jpg|jpeg|png|gif|jpe|tif|tiff|svg|ico|webp';
}
else
{
$copy_str .= '|jpg|jpeg|png|gif|jpe|tif|tiff|svg|ico|webp';
}
}
if($copy_str != '')
{
if($also_imgs == true)
{
$new_post_content1 = preg_replace("~\ssrcset=['\"](?:[^'\"]*)['\"]~i", ' ', $new_post_content);
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
preg_match_all('/(?:http|https|ftp|ftps)?:\/\/\S+\.(?:' . $copy_str . ')/', $new_post_content, $matches);
if(isset($matches[0][0]))
{
$matches[0] = array_unique($matches[0]);
foreach($matches[0] as $match)
{
$match = crawlomatic_fix_single_link($match, $url);
$att_id = '';
$file_path = crawlomatic_copy_image_locally($match, $use_proxy, $request_delay, $custom_user_agent, $user_pass, $custom_cookies, $att_id);
if($file_path != false)
{
$file_path = str_replace('\', '/', $file_path);
$new_post_content = str_replace($match, $file_path, $new_post_content);
if($att_id != '')
{
$my_post['attach_ids'][] = $att_id;
}
}
}
}
}
if($copy_regex != '')
{
preg_match_all($copy_regex, $new_post_content, $matches);
if(isset($matches[0][0]))
{
$matches[0] = array_unique($matches[0]);
foreach($matches[0] as $match)
{
$match = crawlomatic_fix_single_link($match, $url);
$att_id = '';
$file_path = crawlomatic_copy_image_locally($match, $use_proxy, $request_delay, $custom_user_agent, $user_pass, $custom_cookies, $att_id);
if($file_path != false)
{
$file_path = str_replace('\', '/', $file_path);
$new_post_content = str_replace($match, $file_path, $new_post_content);
if($att_id != '')
{
$my_post['attach_ids'][] = $att_id;
}
}
}
}
}
if ((isset($crawlomatic_Main_Settings['link_attributes_internal']) && $crawlomatic_Main_Settings['link_attributes_internal'] !== '') || (isset($crawlomatic_Main_Settings['link_attributes_external']) && $crawlomatic_Main_Settings['link_attributes_external'] !== ''))
{
$new_post_content = crawlomatic_add_link_tags($new_post_content);
}
if (isset($crawlomatic_Main_Settings['iframe_resize_width']) && $crawlomatic_Main_Settings['iframe_resize_width'] !== '')
{
$new_post_content1 = preg_replace("~<iframe(.*?)(?:width=[\"\'](?:\d*?)[\"\'])?(.*?)>~i", '<iframe$1 width="' . esc_attr($crawlomatic_Main_Settings['iframe_resize_width']) . '"$2>', $new_post_content);
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
if (isset($crawlomatic_Main_Settings['iframe_resize_height']) && $crawlomatic_Main_Settings['iframe_resize_height'] !== '')
{
$new_post_content1 = preg_replace("~<iframe(.*?)(?:height=[\"\'](?:\d*?)[\"\'])?(.*?)>~i", '<iframe$1 height="' . esc_attr($crawlomatic_Main_Settings['iframe_resize_height']) . '"$2>', $new_post_content);
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
if($regex_image == '1')
{
if ($strip_by_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $strip_by_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_regex);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_cont = preg_replace("~" . $sbr . "~i", $repreg, $get_img);
if($temp_cont !== NULL)
{
$get_img = $temp_cont;
}
}
}
}
if($featured_replacer != '' && !empty($get_img))
{
$xfeatured_replacer = preg_split('/
|
|
/', $featured_replacer);
foreach($xfeatured_replacer as $featured_img_repl)
{
$repl_parts = explode('=>', $featured_img_repl);
if(isset($repl_parts[1]))
{
if(trim($repl_parts[0]) == $get_img)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Replacing featured image "' . esc_html(trim($repl_parts[0])) . '" with new image: "' . esc_html(trim($repl_parts[1])) . '"');
}
$get_img = trim($repl_parts[1]);
}
}
}
}
$my_post['crawlomatic_post_image'] = $get_img;
if ($strip_by_regex_title !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $strip_by_regex_title);
$xreplace_regex = preg_split('/
|
|
/', $replace_regex_title);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_cont_title = preg_replace("~" . $sbr . "~i", $repreg, $new_post_title);
if($temp_cont_title !== NULL)
{
$new_post_title = $temp_cont_title;
}
}
}
$exc_cont = $content;
if ($strip_by_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $strip_by_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_regex);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_contx = preg_replace("~" . $sbr . "~i", $repreg, $exc_cont);
if($temp_contx !== NULL)
{
$exc_cont = $temp_contx;
}
}
}
$new_post_content = str_replace('</ iframe>', '</iframe>', $new_post_content);
if ($keep_source == '1')
{
$new_post_content1 = preg_replace('{"https:\/\/translate.google.com\/translate\?hl=(?:.*?)&prev=_t&sl=(?:.*?)&tl=(?:.*?)&u=([^"]*?)"}i', "$1", urldecode($new_post_content));
if($new_post_content1 !== null)
{
$new_post_content = $new_post_content1;
}
}
if (isset($crawlomatic_Main_Settings['fix_html']) && $crawlomatic_Main_Settings['fix_html'] == "on")
{
$new_post_content = crawlomatic_repairHTML($new_post_content);
if (isset($crawlomatic_Main_Settings['alt_read']) && $crawlomatic_Main_Settings['alt_read'] == "on")
{
$new_post_content = str_replace('<html><body>', '', $new_post_content);
$new_post_content = str_replace('</body></html>', '', $new_post_content);
$new_post_content = str_replace('<a ', ' <a ', $new_post_content);
}
}
if (isset($crawlomatic_Main_Settings['strip_html']) && $crawlomatic_Main_Settings['strip_html'] == 'on') {
$new_post_content = crawlomatic_strip_html_tags_nl($new_post_content);
}
if($ret_content == 1)
{
return array($new_post_content, $new_post_title);
}
if (!isset($crawlomatic_Main_Settings['disable_excerpt']) || $crawlomatic_Main_Settings['disable_excerpt'] != "on")
{
if(!isset($items[$iloop]['excerpt']) || trim($items[$iloop]['excerpt']) == '')
{
if ($translate != "disabled" && $translate != "en") {
$my_post['post_excerpt'] = crawlomatic_getExcerpt($new_post_content);
} else {
$my_post['post_excerpt'] = crawlomatic_getExcerpt($exc_cont);
}
}
}
$my_post['post_content'] = trim($new_post_content);
$my_post['auto_delete'] = '';
if ($auto_delete !== "") {
$auto_delete = trim($auto_delete, ' "');
$del_time = strtotime($auto_delete);
if($del_time !== false)
{
$my_post['auto_delete'] = $del_time;
}
}
if (isset($crawlomatic_Main_Settings['no_dup_titles']) && $crawlomatic_Main_Settings['no_dup_titles'] == 'on')
{
$xposts = get_posts(
array(
'post_type' => $post_type,
'title' => html_entity_decode($new_post_title),
'post_status' => 'all',
'numberposts' => 1,
'update_post_term_cache' => false,
'update_post_meta_cache' => false,
'orderby' => 'post_date ID',
'order' => 'ASC',
)
);
if ( ! empty( $xposts ) ) {
$zap = $xposts[0];
} else {
$zap = null;
}
if($zap !== null)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . esc_html($new_post_title) . '", because it was found to be already posted (same title post exists)!');
}
continue;
}
}
$my_post['post_title'] = $new_post_title;
$my_post['original_title'] = $title;
$my_post['original_content'] = $content;
$my_post['crawlomatic_timestamp'] = crawlomatic_get_date_now();
$my_post['crawlomatic_post_format'] = $post_format;
if ($enable_pingback == '1') {
$my_post['ping_status'] = 'open';
} else {
$my_post['ping_status'] = 'closed';
}
$custom_arr = array();
if($custom_fields != '')
{
if(stristr($custom_fields, '=>') != false)
{
$rule_arr = explode(',', trim($custom_fields));
foreach($rule_arr as $rule)
{
$my_args = explode('=>', trim($rule));
if(isset($my_args[1]))
{
if(isset($my_args[2]))
{
$req_list = explode(',', $my_args[2]);
$required_found = false;
foreach($req_list as $rl)
{
if(function_exists('mb_stristr'))
{
if(mb_stristr($new_post_content, trim($rl)) !== false)
{
$required_found = true;
break;
}
}
else
{
if(stristr($new_post_content, trim($rl)) === false)
{
$required_found = true;
break;
}
}
if(function_exists('mb_stristr'))
{
if(mb_stristr($new_post_title, trim($rl)) !== false)
{
$required_found = true;
break;
}
}
else
{
if(stristr($new_post_title, trim($rl)) === false)
{
$required_found = true;
break;
}
}
}
if($required_found === false)
{
if(isset($my_args[3]))
{
$my_args[1] = $my_args[3];
}
else
{
continue;
}
}
}
$custom_field_content = trim($my_args[1]);
$custom_field_content = crawlomatic_replaceContentShortcodes($custom_field_content, $new_post_title, $new_post_content, $short_url, $extra_categories, $item_tags, $get_img, $description, $read_more, $postdate, $items[$iloop]['price'], $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $items[$iloop]['gallery'], $gallery_regex, $replace_gallery_regex, $items[$iloop]['regular_price'], $item_regular_price_multi);
if(stristr($my_args[0], '[') !== false && stristr($my_args[0], ']') !== false)
{
preg_match_all('#([^\[\]]*?)\[([^\[\]]*?)\]#', $my_args[0], $cfm);
if(isset($cfm[2][0]))
{
if(isset($custom_arr[trim($cfm[1][0])]) && is_array($custom_arr[trim($cfm[1][0])]))
{
$custom_arr[trim($cfm[1][0])] = array_merge($custom_arr[trim($cfm[1][0])], array(trim($cfm[2][0]) => $custom_field_content));
}
else
{
$custom_arr[trim($cfm[1][0])] = array(trim($cfm[2][0]) => $custom_field_content);
}
}
else
{
$custom_arr[trim($my_args[0])] = $custom_field_content;
}
}
else
{
$custom_arr[trim($my_args[0])] = $custom_field_content;
}
}
}
}
}
if($woo_active && ($post_type == 'product' || $post_type == 'product_variation'))
{
if(strstr($custom_fields, '_price') === false)
{
$custom_arr['_price'] = $item_price_multi;
}
if(strstr($custom_fields, '_sale_price') === false)
{
$custom_arr['_sale_price'] = $item_price_multi;
}
if(strstr($custom_fields, '_regular_price') === false)
{
if(!empty($item_regular_price_multi) && $item_regular_price_multi !== 0)
{
$custom_arr['_regular_price'] = $item_regular_price_multi;
}
else
{
$custom_arr['_regular_price'] = $item_price_multi;
}
}
if(strstr($custom_fields, '_visibility') === false)
{
$custom_arr['_visibility'] = 'visible';
}
if(strstr($custom_fields, '_manage_stock') === false)
{
$custom_arr['_manage_stock'] = 'no';
}
if(strstr($custom_fields, '_stock_status') === false)
{
$custom_arr['_stock_status'] = 'instock';
}
if(strstr($custom_fields, '_sku') === false)
{
$custom_arr['_sku'] = crawlomatic_generate_random_string(10);
}
}
$custom_tax_arr = array();
if($custom_tax != '')
{
if(stristr($custom_tax, '=>') != false)
{
$rule_arr = explode(';', trim($custom_tax));
foreach($rule_arr as $rule)
{
$my_args = explode('=>', trim($rule));
if(isset($my_args[1]))
{
if(isset($my_args[2]))
{
$req_list = explode(',', $my_args[2]);
$required_found = false;
foreach($req_list as $rl)
{
if(function_exists('mb_stristr'))
{
if(mb_stristr($new_post_content, trim($rl)) !== false)
{
$required_found = true;
break;
}
}
else
{
if(stristr($new_post_content, trim($rl)) === false)
{
$required_found = true;
break;
}
}
if(function_exists('mb_stristr'))
{
if(mb_stristr($new_post_title, trim($rl)) !== false)
{
$required_found = true;
break;
}
}
else
{
if(stristr($new_post_title, trim($rl)) === false)
{
$required_found = true;
break;
}
}
}
if($required_found === false)
{
if(isset($my_args[3]))
{
$my_args[1] = $my_args[3];
}
else
{
continue;
}
}
}
$custom_tax_content = trim($my_args[1]);
$custom_tax_content = crawlomatic_replaceContentShortcodes($custom_tax_content, $new_post_title, $new_post_content, $short_url, $extra_categories, $item_tags, $get_img, $description, $read_more, $postdate, $items[$iloop]['price'], $item_price_multi, $custom_shortcodes_arr, $img_attr, $screenimageURL, $append_urls, $item_download, $items[$iloop]['gallery'], $gallery_regex, $replace_gallery_regex, $items[$iloop]['regular_price'], $item_regular_price_multi);
if(substr(trim($my_args[0]), 0, 3) === "pa_" && $post_type == 'product' && !empty($custom_tax_content))
{
if(isset($custom_arr['_product_attributes']))
{
$custom_arr['_product_attributes'] = array_merge($custom_arr['_product_attributes'], array(trim($my_args[0]) =>array(
'name' => trim($my_args[0]),
'value' => $custom_tax_content,
'is_visible' => '1',
'is_taxonomy' => '1'
)));
}
else
{
$custom_arr['_product_attributes'] = array(trim($my_args[0]) =>array(
'name' => trim($my_args[0]),
'value' => $custom_tax_content,
'is_visible' => '1',
'is_taxonomy' => '1'
));
}
}
if(isset($custom_tax_arr[trim($my_args[0])]))
{
$custom_tax_arr[trim($my_args[0])] .= ',' . $custom_tax_content;
}
else
{
$custom_tax_arr[trim($my_args[0])] = $custom_tax_content;
}
}
}
}
}
if(count($custom_tax_arr) > 0)
{
$my_post['taxo_input'] = $custom_tax_arr;
}
$my_post['meta_input'] = $custom_arr;
if($parent_id != '')
{
$my_post['post_parent'] = intval($parent_id);
}
if($my_post['post_content'] === '' && $my_post['post_title'] === '')
{
continue;
}
$post_array[] = $items[$iloop]['title'];
if (isset($crawlomatic_Main_Settings['up_publish_date']) && $crawlomatic_Main_Settings['up_publish_date'] == 'on' && $my_post['update_meta_id'] != '' && is_numeric($my_post['update_meta_id']))
{
$my_post['post_date_gmt'] = gmdate("Y-m-d H:i:s", time());
$my_post['post_date'] = date("Y-m-d H:i:s", time());
}
if (isset($crawlomatic_Main_Settings['cleanup_not_printable']) && $crawlomatic_Main_Settings['cleanup_not_printable'] == 'on')
{
$pxca = preg_replace('/[-]/u', '', $my_post['post_content']);
if($pxca !== null)
{
$my_post['post_content'] = $pxca;
}
$pxta = preg_replace('/[-]/u', '', $my_post['post_title']);
if($pxta !== null)
{
$my_post['post_title'] = $pxta;
}
}
if(isset($items[$iloop]['variant_parent']))
{
$my_post['post_parent'] = $items[$iloop]['variant_parent'];
if($my_post['post_type'] == 'product')
{
$my_post['post_type'] = 'product_variation';
if($my_post['post_status'] != 'publish')
{
$my_post['post_status'] = 'publish';
}
}
}
if (isset($crawlomatic_Main_Settings['publish_delay']) && $crawlomatic_Main_Settings['publish_delay'] != '')
{
$intsleep = intval($crawlomatic_Main_Settings['publish_delay']);
if($intsleep > 0)
{
usleep($intsleep * 1000);
}
$my_post['post_date_gmt'] = gmdate("Y-m-d H:i:s", time());
$my_post['post_date'] = date("Y-m-d H:i:s", time());
}
if (!isset($crawlomatic_Main_Settings['keep_filters']) || $crawlomatic_Main_Settings['keep_filters'] != 'on')
{
remove_filter('content_save_pre', 'wp_filter_post_kses');
remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');remove_filter('title_save_pre', 'wp_filter_kses');
}
if($my_post['update_meta_id'] != '' && is_numeric($my_post['update_meta_id']))
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Updating post: ' . $my_post['update_meta_id']);
}
$my_post['ID'] = $my_post['update_meta_id'];
unset($my_post['post_status']);
if(trim($my_post['post_content']) == '')
{
unset($my_post['post_content']);
}
$post_id = wp_update_post($my_post, true);
}
else
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Publishing post: "' . $my_post['post_title'] . '"');
}
$post_id = wp_insert_post($my_post, true);
}
if (!isset($crawlomatic_Main_Settings['keep_filters']) || $crawlomatic_Main_Settings['keep_filters'] != 'on')
{
add_filter('content_save_pre', 'wp_filter_post_kses');
add_filter('content_filtered_save_pre', 'wp_filter_post_kses');add_filter('title_save_pre', 'wp_filter_kses');
}
if (!is_wp_error($post_id)) {
if($post_id === 0)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Post not inserted, returned error: ' . $my_post['post_title']);
}
continue;
}
if($wpml_lang != '' && function_exists('pll_set_post_language'))
{
pll_set_post_language($post_id, $wpml_lang);
}
if(!isset($items[$iloop]['variant_parent']))
{
$posts_inserted++;
}
else
{
if(class_exists('WC_Product_Variation') && $post_type == 'product')
{
$pvariation = new WC_Product_Variation( $post_id );
$pvariation->set_sale_price( $item_price_multi );
$pvariation->set_price( $item_price_multi );
if(!empty($item_regular_price_multi) && $item_regular_price_multi !== 0)
{
$pvariation->set_regular_price( $item_regular_price_multi );
}
else
{
$pvariation->set_regular_price( $item_price_multi );
}
$pvariation->set_manage_stock(false);
if(isset($items[$iloop]['extrainfo']['weight']))
{
$pvariation->set_weight($items[$iloop]['extrainfo']['weight']);
}
if(isset($items[$iloop]['extrainfo']['length']))
{
$pvariation->set_length($items[$iloop]['extrainfo']['length']);
}
if(isset($items[$iloop]['extrainfo']['width']))
{
$pvariation->set_width($items[$iloop]['extrainfo']['width']);
}
if(isset($items[$iloop]['extrainfo']['height']))
{
$pvariation->set_height($items[$iloop]['extrainfo']['height']);
}
if(isset($items[$iloop]['extrainfo']['is_downloadable']))
{
$pvariation->set_downloadable($items[$iloop]['extrainfo']['is_downloadable']);
}
if(isset($items[$iloop]['extrainfo']['is_in_stock']))
{
$pvariation->set_stock_status($items[$iloop]['extrainfo']['is_in_stock']);
}
if(isset($items[$iloop]['extrainfo']['is_virtual']))
{
$pvariation->set_virtual($items[$iloop]['extrainfo']['is_virtual']);
}
$pvariation->save();
}
}
$variant_names = '';
if(isset($items[$iloop]['variants']))
{
foreach($items[$iloop]['variants'] as $varpost)
{
if(isset($varpost['variant_name']))
{
$variant_names .= ' ' . trim($varpost['variant_name']) . ' |';
}
$varpost['variant_parent'] = $post_id;
$items[] = $varpost;
}
$variant_names = rtrim(trim($variant_names, '|'));
if($post_type == 'product')
{
wp_set_object_terms( $post_id, 'variable', 'product_type' );
$attributes_array = get_post_meta( $post_id, '_product_attributes', true);
if(!is_array($attributes_array))
{
$attributes_array = array();
}
if(!empty($variants_label))
{
$attr_label = $variants_label;
}
else
{
$attr_label = esc_html__('Variants', 'crawlomatic-multipage-scraper-post-generator');
}
$attr_slug = sanitize_title($attr_label);
$attributes_array[$attr_slug] = array(
'name' => $attr_label,
'value' => trim($variant_names),
'is_visible' => '1',
'is_variation' => '1',
'is_taxonomy' => '0'
);
update_post_meta( $post_id, '_product_attributes', $attributes_array );
}
}
if(isset($items[$iloop]['variant_name']))
{
if(!empty($variants_label))
{
$attr_label = $variants_label;
}
else
{
$attr_label = esc_html__('Variants', 'crawlomatic-multipage-scraper-post-generator');
}
$attr_slug = sanitize_title($attr_label);
update_post_meta( $post_id, 'attribute_' . $attr_slug, trim($items[$iloop]['variant_name']) );
}
if(count($my_post['download_local']) > 0)
{
foreach($my_post['download_local'] as $dlloc)
{
$wp_filetype = wp_check_filetype( $dlloc, null );
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => 'Downloaded file for post ID ' . $post_id,
'post_content' => '',
'post_status' => 'inherit'
);
$screens_attach_id = wp_insert_attachment($attachment, $dlloc, $post_id);
require_once( ABSPATH . 'wp-admin/includes/image.php' );
require_once( ABSPATH . 'wp-admin/includes/media.php' );
$attach_data = wp_generate_attachment_metadata($screens_attach_id, $dlloc);
wp_update_attachment_metadata( $screens_attach_id, $attach_data );
if($post_type == 'product' && class_exists('WC_Product_Download'))
{
$file_url = wp_get_attachment_url( $screens_attach_id );
$download_id = md5( $file_url );
$file_name = $my_post['post_title'];
$pd_object = new WC_Product_Download();
$pd_object->set_id( $download_id );
$pd_object->set_name( $file_name );
$pd_object->set_file( $file_url );
$product = wc_get_product( $post_id );
if($product !== null)
{
$downloads = $product->get_downloads();
$downloads[$download_id] = $pd_object;
$product->set_downloads($downloads);
$product->save();
}
}
}
}
if($remove_default == '1' && (($auto_categories == '1' && $my_post['extra_categories'] != '') || (isset($default_category) && $default_category !== 'crawlomatic_no_category_12345678' && $default_category[0] !== 'crawlomatic_no_category_12345678')))
{
$default_categories = wp_get_post_categories($post_id);
}
if(isset($my_post['taxo_input']))
{
foreach($my_post['taxo_input'] as $taxn => $taxval)
{
$taxn = trim($taxn);
$taxval = trim($taxval);
if(is_taxonomy_hierarchical($taxn))
{
$taxval = array_map('trim', explode(',', $taxval));
for($ii = 0; $ii < count($taxval); $ii++)
{
if(!is_numeric($taxval[$ii]))
{
$xtermid = get_term_by('name', $taxval[$ii], $taxn);
if($xtermid !== false)
{
$taxval[$ii] = intval($xtermid->term_id);
}
else
{
wp_insert_term( $taxval[$ii], $taxn);
$xtermid = get_term_by('name', $taxval[$ii], $taxn);
if($xtermid !== false)
{
if($wpml_lang != '' && function_exists('pll_set_term_language'))
{
pll_set_term_language($xtermid->term_id, $wpml_lang);
}
elseif($wpml_lang != '' && has_filter('wpml_object_id'))
{
$wpml_element_type = apply_filters( 'wpml_element_type', $taxn );
$pars['element_id'] = $xtermid->term_id;
$pars['element_type'] = $wpml_element_type;
$pars['language_code'] = $wpml_lang;
$pars['trid'] = FALSE;
$pars['source_language_code'] = NULL;
do_action('wpml_set_element_language_details', $pars);
}
$taxval[$ii] = intval($xtermid->term_id);
}
}
}
}
wp_set_post_terms($post_id, $taxval, $taxn, true);
}
else
{
wp_set_post_terms($post_id, trim($taxval), $taxn, true);
}
}
}
if (isset($my_post['crawlomatic_post_format']) && $my_post['crawlomatic_post_format'] != '' && $my_post['crawlomatic_post_format'] != 'post-format-standard') {
wp_set_post_terms($post_id, $my_post['crawlomatic_post_format'], 'post_format', true);
}
if($my_post['screen_attach'] != '')
{
$media_post = wp_update_post( array(
'ID' => $my_post['screen_attach'],
'post_parent' => $post_id,
), true );
if( is_wp_error( $media_post ) ) {
crawlomatic_log_to_file( 'Failed to assign post attachment ' . $my_post['screen_attach'] . ' to post id ' . $post_id . ': ' . print_r( $media_post, 1 ) );
}
}
$featured_path = '';
$image_failed = false;
if (!isset($crawlomatic_Main_Settings['no_up_img']) || $crawlomatic_Main_Settings['no_up_img'] != 'on' || $my_post['update_meta_id'] == '' || !is_numeric($my_post['update_meta_id']))
{
if(isset($my_post['post_gallery']) && !empty($my_post['post_gallery']))
{
if (($key = array_search($my_post['crawlomatic_post_image'], $my_post['post_gallery'])) !== false) {
unset($my_post['post_gallery'][$key]);
$my_post['post_gallery'] = array_values($my_post['post_gallery']);
}
$xcounter = 1;
$attach_ids = array();
for($cntj = 0; $cntj < count($my_post['post_gallery']); $cntj++)
{
$my_post['post_gallery'][$cntj] = htmlspecialchars_decode($my_post['post_gallery'][$cntj]);
if ($gallery_regex !== '')
{
$xstrip_by_regex = preg_split('/
|
|
/', $gallery_regex);
$xreplace_regex = preg_split('/
|
|
/', $replace_gallery_regex);
$xcnt = 0;
foreach($xstrip_by_regex as $sbr)
{
if(isset($xreplace_regex[$xcnt]))
{
$repreg = $xreplace_regex[$xcnt];
}
else
{
$repreg = '';
}
$xcnt++;
$temp_cont_gallery = preg_replace("~" . $sbr . "~i", $repreg, $my_post['post_gallery'][$cntj]);
if($temp_cont_gallery !== NULL)
{
$my_post['post_gallery'][$cntj] = $temp_cont_gallery;
}
}
}
}
$my_post['post_gallery'] = array_unique($my_post['post_gallery']);
foreach($my_post['post_gallery'] as $gimg)
{
$gimg = trim($gimg);
if(empty($gimg))
{
continue;
}
$uploaded_gallery = crawlomatic_upload_attachment_media($gimg, $post_id, $use_proxy, $request_delay, $custom_user_agent, $custom_cookies, $user_pass, $xcounter);
if($uploaded_gallery === false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('crawlomatic_upload_attachment_media failed for ' . $gimg . '!');
}
}
else
{
$attach_ids[] = $uploaded_gallery;
}
$xcounter++;
}
if($post_type == 'product' && !empty($attach_ids))
{
update_post_meta($post_id, '_product_image_gallery', implode(',', $attach_ids));
}
}
}
if ($my_post['update_meta_id'] == '' || !is_numeric($my_post['update_meta_id']) || !isset($crawlomatic_Main_Settings['no_up_img']) || $crawlomatic_Main_Settings['no_up_img'] != 'on')
{
if ($featured_image == '1') {
$get_img = $my_post['crawlomatic_post_image'];
if ($get_img != '') {
if (!crawlomatic_generate_featured_image($get_img, $post_id, $use_proxy, $request_delay, $custom_user_agent, $custom_cookies, $user_pass)) {
$image_failed = true;
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('crawlomatic_generate_featured_image failed for ' . $get_img . '!');
}
} else {
$featured_path = $get_img;
if ( ! add_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path, true ) )
{
update_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path );
}
}
} else {
$image_failed = true;
}
}
if ($image_failed || $featured_image !== '1') {
if ($image_url != '')
{
$replacement = str_replace(array('[', ']'), '', $my_post['post_title']);
$image_url_temp = str_replace('%%item_title%%', $replacement, $image_url);
$image_url_temp = preg_replace_callback('#%%random_image\[([^\]]*?)\]%%#', function ($matches) {
$my_img = crawlomatic_get_random_image_google($matches[1]);
return $my_img;
}, $image_url_temp);
$image_urlx = explode(',', $image_url_temp);
$image_urlx = trim($image_urlx[array_rand($image_urlx)]);
$retim = false;
if(is_numeric($image_urlx) && $image_urlx > 0)
{
require_once(ABSPATH . 'wp-admin/includes/image.php');
require_once(ABSPATH . 'wp-admin/includes/media.php');
$res2 = set_post_thumbnail($post_id, $image_urlx);
if ($res2 === FALSE) {
}
else
{
$retim = true;
}
}
if($retim == false && $image_urlx != '')
{
if (isset($crawlomatic_Main_Settings['crawlomatic_featured_image_checking']) && $crawlomatic_Main_Settings['crawlomatic_featured_image_checking'] == 'on') {
stream_context_set_default( [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
error_reporting(0);
$url_headers = get_headers($image_urlx, 1);
error_reporting(E_ALL);
if (isset($url_headers['Content-Type'])) {
if (is_array($url_headers['Content-Type'])) {
$img_type = strtolower($url_headers['Content-Type'][0]);
} else {
$img_type = strtolower($url_headers['Content-Type']);
}
if (strstr($img_type, 'image/') !== false) {
if (!crawlomatic_generate_featured_image($image_urlx, $post_id, $use_proxy, $request_delay, $custom_user_agent, $custom_cookies, $user_pass)) {
$image_failed = true;
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('crawlomatic_generate_featured_image failed to default value: ' . $image_urlx . '!');
}
} else {
$featured_path = $image_urlx;
if ( ! add_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path, true ) ) {
update_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path );
}
}
}
}
}
else
{
if (!crawlomatic_generate_featured_image($image_urlx, $post_id, $use_proxy, $request_delay, $custom_user_agent, $custom_cookies, $user_pass)) {
$image_failed = true;
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('crawlomatic_generate_featured_image failed to default value: ' . $image_urlx . '!');
}
} else {
$featured_path = $image_urlx;
if ( ! add_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path, true ) ) {
update_post_meta( $post_id, 'crawlomatic_featured_img', $featured_path );
}
}
}
}
}
}
if($featured_image == '1' && $featured_path == '' && ($skip_no_image == '1' || (isset($crawlomatic_Main_Settings['skip_no_img']) && $crawlomatic_Main_Settings['skip_no_img'] == 'on')))
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Skipping post "' . $my_post['post_title'] . '", because it failed to generate a featured image for: ' . $get_img . ' and ' . $image_url);
}
wp_delete_post($post_id, true);
if(!isset($items[$iloop]['variant_parent']))
{
$posts_inserted--;
}
continue;
}
}
if ($can_create_tag == '1') {
if(strstr($custom_tax, 'product_tag') === false)
{
if ($my_post['tags_input'] != '')
{
if($post_type == 'product')
{
wp_set_post_terms($post_id, $my_post['tags_input'], 'product_tag', true);
}
}
}
}
if ($auto_categories == '1') {
if(strstr($custom_tax, 'product_cat') === false)
{
if ($my_post['extra_categories'] != '') {
if($post_type == 'product')
{
if($parent_category_id != '')
{
$termid = crawlomatic_create_terms('product_cat', $parent_category_id, $my_post['extra_categories'], $remove_cats);
}
else
{
$termid = crawlomatic_create_terms('product_cat', null, $my_post['extra_categories'], $remove_cats);
}
if($wpml_lang != '' && function_exists('pll_set_term_language'))
{
foreach($termid as $tx)
{
pll_set_term_language($tx, $wpml_lang);
}
}
elseif($wpml_lang != '' && has_filter('wpml_object_id'))
{
$wpml_element_type = apply_filters( 'wpml_element_type', 'product_cat' );
foreach($termid as $tx)
{
$pars['element_id'] = $tx;
$pars['element_type'] = $wpml_element_type;
$pars['language_code'] = $wpml_lang;
$pars['trid'] = FALSE;
$pars['source_language_code'] = NULL;
do_action('wpml_set_element_language_details', $pars);
}
}
}
else
{
if($parent_category_id != '')
{
$termid = crawlomatic_create_terms('category', $parent_category_id, $my_post['extra_categories'], $remove_cats);
}
else
{
$termid = crawlomatic_create_terms('category', null, $my_post['extra_categories'], $remove_cats);
}
if($wpml_lang != '' && function_exists('pll_set_term_language'))
{
foreach($termid as $tx)
{
pll_set_term_language($tx, $wpml_lang);
}
}
elseif($wpml_lang != '' && has_filter('wpml_object_id'))
{
$wpml_element_type = apply_filters( 'wpml_element_type', 'category' );
foreach($termid as $tx)
{
$pars['element_id'] = $tx;
$pars['element_type'] = $wpml_element_type;
$pars['language_code'] = $wpml_lang;
$pars['trid'] = FALSE;
$pars['source_language_code'] = NULL;
do_action('wpml_set_element_language_details', $pars);
}
}
}
if($post_type == 'product')
{
wp_set_post_terms($post_id, $termid, 'product_cat', true);
}
else
{
wp_set_post_terms($post_id, $termid, 'category', true);
}
}
}
}
if (isset($default_category) && $default_category !== 'crawlomatic_no_category_12345678' && $default_category[0] !== 'crawlomatic_no_category_12345678') {
if(is_array($default_category))
{
$cats = array();
$wcats = array();
foreach($default_category as $dc)
{
if(substr($dc, 0, 1) === 'w')
{
$wcats[] = ltrim($dc, 'w');
}
else
{
$cats[] = $dc;
}
}
if($post_type == 'product')
{
global $sitepress;
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$current_language = apply_filters( 'wpml_current_language', NULL );
$sitepress->switch_lang($wpml_lang);
}
wp_set_post_terms($post_id, $wcats, 'product_cat', true);
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$sitepress->switch_lang($current_language);
}
}
else
{
global $sitepress;
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$current_language = apply_filters( 'wpml_current_language', NULL );
$sitepress->switch_lang($wpml_lang);
}
wp_set_post_categories($post_id, $cats, true);
if($wpml_lang != '' && function_exists('pll_set_term_language'))
{
foreach($cats as $cc)
{
pll_set_term_language($cc, $wpml_lang);
}
}
elseif($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$sitepress->switch_lang($current_language);
}
}
}
else
{
$cats = array();
$wcats = array();
if(substr($default_category, 0, 1) === 'w')
{
$wcats[] = ltrim($default_category, 'w');
}
else
{
$cats[] = $default_category;
}
if($post_type == 'product')
{
global $sitepress;
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$current_language = apply_filters( 'wpml_current_language', NULL );
$sitepress->switch_lang($wpml_lang);
}
wp_set_post_terms($post_id, $wcats, 'product_cat', true);
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$sitepress->switch_lang($current_language);
}
}
else
{
global $sitepress;
if($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$current_language = apply_filters( 'wpml_current_language', NULL );
$sitepress->switch_lang($wpml_lang);
}
wp_set_post_categories($post_id, $cats, true);
if($wpml_lang != '' && function_exists('pll_set_term_language'))
{
foreach($cats as $cc)
{
pll_set_term_language($cc, $wpml_lang);
}
}
elseif($wpml_lang != '' && has_filter('wpml_current_language') && $sitepress != null)
{
$sitepress->switch_lang($current_language);
}
}
}
}
if($remove_default == '1' && (($auto_categories == '1' && $my_post['extra_categories'] != '') || (isset($default_category) && $default_category !== 'crawlomatic_no_category_12345678' && $default_category[0] !== 'crawlomatic_no_category_12345678')))
{
$new_categories = wp_get_post_categories($post_id);
if(isset($default_categories) && !($default_categories == $new_categories))
{
foreach($default_categories as $dc)
{
$rem_cat = get_category( $dc );
wp_remove_object_terms( $post_id, $rem_cat->slug, 'category' );
}
}
}
if (isset($crawlomatic_Main_Settings['post_source_custom']) && $crawlomatic_Main_Settings['post_source_custom'] != '') {
$tax_rez = wp_set_object_terms( $post_id, $crawlomatic_Main_Settings['post_source_custom'], 'coderevolution_post_source', true);
}
else
{
$tax_rez = wp_set_object_terms( $post_id, 'Crawlomatic_' . $param, 'coderevolution_post_source', true);
}
if (is_wp_error($tax_rez)) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('wp_set_object_terms failed for: ' . $post_id . '!');
}
}
if($post_type == 'topic' && $parent_id != '')
{
update_post_meta($post_id, '_bbp_forum_id', $parent_id);
update_post_meta($post_id, '_bbp_topic_id', $post_id);
update_post_meta($post_id, '_bbp_voice_count', '0');
update_post_meta($post_id, '_bbp_reply_count', '0');
update_post_meta($post_id, '_bbp_reply_count_hidden', '0');
update_post_meta($post_id, '_bbp_last_reply_id', '0');
update_post_meta($post_id, '_bbp_last_active_id', $post_id);
update_post_meta($post_id, '_bbp_last_active_time', get_post_field( 'post_date', $post_id, 'db' ));
do_action( 'bbp_insert_topic', (int) $post_id, (int) $parent_id );
}
if($post_type == 'reply' && $parent_id != '')
{
if(function_exists('bbp_get_topic_forum_id'))
{
$forum_aidi = bbp_get_topic_forum_id($parent_id);
if(empty($forum_aidi))
{
$forum_aidi = 0;
}
}
else
{
$forum_aidi = 0;
}
do_action( 'bbp_insert_reply', (int) $post_id, (int) $parent_id, (int) $forum_aidi );
}
if (isset($crawlomatic_Main_Settings['link_source']) && $crawlomatic_Main_Settings['link_source'] == 'on') {
$title_link_url = '1';
}
else
{
$title_link_url = '0';
}
if($featured_path == '')
{
$featured_path = $my_post['crawlomatic_post_image'];
}
if(!empty($my_post['attach_ids']))
{
foreach($my_post['attach_ids'] as $zatt_id)
{
crawlomatic_add_attachment_to_post($post_id, $zatt_id);
}
}
crawlomatic_addPostMeta($post_id, $my_post, $param, $featured_path, $title_link_url, $css_cont, $rule_unique_id, $crawlomatic_Main_Settings);
if($wpml_lang != '' && (class_exists('SitePress') || function_exists('wpml_object_id')))
{
$wpml_element_type = apply_filters( 'wpml_element_type', $post_type );
$pars['element_id'] = $post_id;
$pars['element_type'] = $wpml_element_type;
$pars['language_code'] = $wpml_lang;
$pars['source_language_code'] = NULL;
do_action('wpml_set_element_language_details', $pars);
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if($wp_filesystem->exists(WP_PLUGIN_DIR . '/sitepress-multilingual-cms/inc/wpml-api.php'))
{
include_once( WP_PLUGIN_DIR . '/sitepress-multilingual-cms/inc/wpml-api.php' );
}
$wpml_lang = trim($wpml_lang);
if(function_exists('wpml_update_translatable_content'))
{
wpml_update_translatable_content('post_' . $post_type, $post_id, $wpml_lang);
if($my_post['crawlomatic_post_orig_url'] != '')
{
global $sitepress;
global $wpdb;
$keyid = md5($my_post['crawlomatic_post_orig_url']);
$keyName = $keyid . '_wpml';
$rezxxxa = $wpdb->get_results( "SELECT * FROM {$wpdb->prefix}postmeta WHERE `meta_key` = '$keyName' limit 1", ARRAY_A );
if(count($rezxxxa) != 0)
{
$metaRow = $rezxxxa[0];
$metaValue = $metaRow['meta_value'];
$metaParts = explode('_', $metaValue);
$sitepress->set_element_language_details($post_id, 'post_'.$my_post['post_type'] , $metaParts[0], $wpml_lang, $metaParts[1] );
}
else
{
$ptrid = $sitepress->get_element_trid($post_id);
update_post_meta($post_id, $keyid.'_wpml', $ptrid.'_'.$wpml_lang );
}
}
}
}
if(isset($items[$iloop]['variant_parent']))
{
if(class_exists('WC_Product_Variable'))
{
WC_Product_Variable::sync( $items[$iloop]['variant_parent'] );
}
}
if (isset($crawlomatic_Main_Settings['draft_first']) && $crawlomatic_Main_Settings['draft_first'] == 'on' && $draft_me == true)
{
crawlomatic_change_post_status($post_id, 'publish');
}
if (isset($crawlomatic_Main_Settings['send_post_email']) && $crawlomatic_Main_Settings['send_post_email'] == 'on')
{
$to = $crawlomatic_Main_Settings['email_address'];
if (!filter_var($to, FILTER_VALIDATE_EMAIL) === false)
{
$subject = get_the_title($post_id);
$content_post = get_post($post_id);
if($content_post !== null)
{
$message = $content_post->post_content;
$message = str_replace(']]>', ']]>', $message);
$headers[] = 'From: Crawlomatic Plugin <[email protected]>';
$headers[] = 'Reply-To: [email protected]';
$headers[] = 'X-Mailer: PHP/' . phpversion();
$headers[] = 'Content-Type: text/html';
$headers[] = 'Charset: ' . get_option('blog_charset', 'UTF-8');
wp_mail($to, $subject, $message, $headers);
}
}
}
} else {
crawlomatic_log_to_file('Failed to insert post into database! Title:' . $my_post['post_title'] . '! Error: ' . $post_id->get_error_message() . 'Error code: ' . $post_id->get_error_code() . 'Error data: ' . $post_id->get_error_data());
continue;
}
if(!isset($items[$iloop]['variant_parent']))
{
$count++;
}
}
unset($posted_items);
}
catch (Exception $e) {
if($continue_search == '1' && isset($skip_posts_temp[$param]))
{
unset($skip_posts_temp[$param]);
update_option('crawlomatic_continue_search', $skip_posts_temp);
}
crawlomatic_log_to_file('Exception thrown ' . esc_html($e->getMessage()) . '!');
if($auto == 1)
{
crawlomatic_clearFromList($param);
}
return 'fail';
}
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Rule ID ' . esc_html($param) . ' for ' . $ids . ' successfully run! ' . esc_html($posts_inserted) . ' posts created!');
}
if (isset($crawlomatic_Main_Settings['send_email']) && $crawlomatic_Main_Settings['send_email'] == 'on' && $crawlomatic_Main_Settings['email_address'] !== '') {
if (isset($crawlomatic_Main_Settings['email_summary']) && $crawlomatic_Main_Settings['email_summary'] == 'on')
{
$last_sent = get_option('crawlomatic_last_sent_email', false);
if($last_sent == false)
{
$last_sent = date("d.m.y");
update_option('crawlomatic_last_sent_email', $last_sent);
}
$email_content = get_option('crawlomatic_email_content', '');
$email_content .= '<br/>Rule ID ' . esc_html($param) . ' for ' . $ids . ' successfully run! ' . esc_html($posts_inserted) . ' posts created!';
if($last_sent != date("d.m.y"))
{
update_option('crawlomatic_last_sent_email', date("d.m.y"));
update_option('crawlomatic_email_content', '');
try {
$to = $crawlomatic_Main_Settings['email_address'];
if (!filter_var($to, FILTER_VALIDATE_EMAIL) === false)
{
$subject = '[Crawlomatic] Rule running report - ' . crawlomatic_get_date_now();
$message = 'Rule ID ' . esc_html($param) . ' for ' . $ids . ' successfully run! ' . esc_html($posts_inserted) . ' posts created!';
$headers[] = 'From: Crawlomatic Plugin <[email protected]>';
$headers[] = 'Reply-To: [email protected]';
$headers[] = 'X-Mailer: PHP/' . phpversion();
$headers[] = 'Content-Type: text/html';
$headers[] = 'Charset: ' . get_option('blog_charset', 'UTF-8');
wp_mail($to, $subject, $message, $headers);
}
}
catch (Exception $e) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to send mail: Exception thrown ' . esc_html($e->getMessage()) . '!');
}
}
}
else
{
update_option('crawlomatic_email_content', $email_content);
}
}
else
{
$getdatex = get_option('crawlomatic_last_sent_email', false);
if($getdatex != false)
{
update_option('crawlomatic_last_sent_email', false);
}
$getdatex = get_option('crawlomatic_email_content', false);
if($getdatex != false)
{
update_option('crawlomatic_email_content', false);
}
try {
$to = $crawlomatic_Main_Settings['email_address'];
if (!filter_var($to, FILTER_VALIDATE_EMAIL) === false)
{
$subject = '[Crawlomatic] Rule running report - ' . crawlomatic_get_date_now();
$message = 'Rule ID ' . esc_html($param) . ' for ' . $ids . ' successfully run! ' . esc_html($posts_inserted) . ' posts created!';
$headers[] = 'From: Crawlomatic Plugin <[email protected]>';
$headers[] = 'Reply-To: [email protected]';
$headers[] = 'X-Mailer: PHP/' . phpversion();
$headers[] = 'Content-Type: text/html';
$headers[] = 'Charset: ' . get_option('blog_charset', 'UTF-8');
wp_mail($to, $subject, $message, $headers);
}
}
catch (Exception $e) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to send mail: Exception thrown ' . esc_html($e->getMessage()) . '!');
}
}
}
}
}
if ($posts_inserted == 0) {
if($auto == 1)
{
crawlomatic_clearFromList($param);
}
return 'nochange';
} else {
if($auto == 1)
{
crawlomatic_clearFromList($param);
}
return 'ok';
}
}
function crawlomatic_change_post_status($post_id, $status){
$current_post = get_post( $post_id, 'ARRAY_A' );
$current_post['post_status'] = $status;
remove_filter('content_save_pre', 'wp_filter_post_kses');
remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');remove_filter('title_save_pre', 'wp_filter_kses');
wp_update_post($current_post);
add_filter('content_save_pre', 'wp_filter_post_kses');
add_filter('content_filtered_save_pre', 'wp_filter_post_kses');add_filter('title_save_pre', 'wp_filter_kses');
}
function crawlomatic_stringMatchWithWildcard($source, $pattern) {
$pattern = preg_quote($pattern,'/');
$pattern = str_replace( '\*' , '.*', $pattern);
return preg_match( '~' . $pattern . '~i' , $source );
}
function crawlomatic_add_link_tags($content) {
$content = preg_replace_callback('~<(a\s[^>]+)>~isU', "crawlomatic_link_callback", $content);
return $content;
}
function crawlomatic_generate_random_string( $length = 16 ) {
return substr( str_shuffle( str_repeat( $x = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', ceil( $length / strlen( $x ) ) ) ), 1, $length );
}
function crawlomatic_link_callback($match)
{
list($original, $tag) = $match;
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$blog_url = get_home_url();
$disallowed = array('http://', 'https://', 'www.');
foreach($disallowed as $d)
{
$blog_url = str_replace($d, '', $blog_url);
}
if (stripos($tag, $blog_url) !== false)
{
if (isset($crawlomatic_Main_Settings['link_attributes_internal']) && $crawlomatic_Main_Settings['link_attributes_internal'] != '')
{
preg_match('#([a-zA-Z]+)=#i', $crawlomatic_Main_Settings['link_attributes_internal'], $matches);
if(isset($matches[1]))
{
foreach($matches[1] as $mone)
{
$tag = preg_replace('#' . preg_quote($mone) . '=[\'"][^\'"]*?[\'"]#', '', $tag );
}
}
return "<$tag " . $crawlomatic_Main_Settings['link_attributes_internal'] . ">";
}
}
else
{
if (isset($crawlomatic_Main_Settings['link_attributes_external']) && $crawlomatic_Main_Settings['link_attributes_external'] != '')
{
preg_match_all('#([a-zA-Z]+)=#i', $crawlomatic_Main_Settings['link_attributes_external'], $matches);
if(isset($matches[1]))
{
foreach($matches[1] as $mone)
{
$tag = preg_replace('#' . preg_quote($mone) . '=[\'"][^\'"]*?[\'"]#', '', $tag );
}
}
return "<$tag " . $crawlomatic_Main_Settings['link_attributes_external'] . ">";
}
}
return $original;
}
$crawlomatic_fatal = false;
function crawlomatic_clear_flag_at_shutdown($param, $auto = false)
{
$error = error_get_last();
if (isset($error['type']) && $error['type'] === E_ERROR && $GLOBALS['crawlomatic_fatal'] === false) {
$GLOBALS['crawlomatic_fatal'] = true;
$running = array();
update_option('crawlomatic_running_list', $running);
update_option('crawlomatic_auto_running_list', $running);
crawlomatic_log_to_file('[FATAL] Exit error: ' . $error['message'] . ', file: ' . $error['file'] . ', line: ' . $error['line'] . ' - rule ID: ' . $param . '!');
if($auto === false)
{
crawlomatic_clearFromList($param);
}
else
{
crawlomatic_clearFromAutoList($param);
}
}
else
{
if($auto === false)
{
crawlomatic_clearFromList($param);
}
else
{
crawlomatic_clearFromAutoList($param);
}
}
}
function crawlomatic_strip_images($content)
{
$content1 = preg_replace("/<img[^>]+\>/i", "", $content);
if($content1 !== null)
{
$content = $content1;
}
return $content;
}
function crawlomatic_get_url_domain($url) {
$result = parse_url($url);
if($result === false)
{
return $url;
}
return $result['scheme']."://".$result['host'];
}
function crawlomatic_strip_links($content)
{
$content1 = preg_replace('~<a(?:[^>]*)>~', "", $content);
if($content1 !== null)
{
$content = $content1;
}
$content1 = preg_replace('~<\/a>~', "", $content);
if($content1 !== null)
{
$content = $content1;
}
return $content;
}
function crawlomatic_strip_internal_links($content, $url)
{
$parse = parse_url($url);
if(isset($parse['host']) && $parse['host'] != '')
{
if(substr_count($parse['host'], '.') > 1)
{
$host_names = explode(".", $parse['host']);
if(isset($host_names[count($host_names)-2]))
{
$host = $host_names[count($host_names)-2] . "." . $host_names[count($host_names)-1];
}
else
{
$host = $parse['host'];
}
}
else
{
$host = $parse['host'];
}
$content1 = preg_replace('#<a(?:[^>]*)href=[\'"]http.?:\/\/(?:[^.]*?\.)?' . preg_quote($host) . '[^\'"]*?[\'"](?:[^>]*)>(.*?)<\/a>#', "\1", $content);
if($content1 !== null)
{
$content = $content1;
}
}
$content1 = preg_replace('#<a(?:[^>]*)href=[\'"]\/[^\'"]*?[\'"](?:[^>]*)>(.*?)<\/a>#', "\1", $content);
if($content1 !== null)
{
$content = $content1;
}
return $content;
}
add_filter('the_title', 'crawlomatic_add_affiliate_title_keyword');
function crawlomatic_add_affiliate_title_keyword($content)
{
$rules = get_option('crawlomatic_keyword_list');
if(!is_array($rules))
{
$rules = array();
}
$output = '';
if (!empty($rules)) {
foreach ($rules as $request => $value) {
if(isset($value[2]) && $value[2] == 'content')
{
continue;
}
if (is_array($value) && isset($value[1]) && $value[1] != '') {
$repl = stripslashes($value[1]);
} else {
$repl = stripslashes($request);
}
if (isset($value[0]) && $value[0] != '') {
$content1 = preg_replace('\'(?!((<.*?)|(<a.*?)))(' . preg_quote(stripslashes($request)) . ')(?!(([^<>]*?)>)|([^>]*?<\/a>))\'i', '<a href="' . stripslashes($value[0]) . '" target="_blank">' . esc_html($repl) . '</a>', $content);
if($content1 !== null)
{
$content = $content1;
}
} else {
$content1 = preg_replace('\'(?!((<.*?)|(<a.*?)))(' . preg_quote(stripslashes($request)) . ')(?!(([^<>]*?)>)|([^>]*?<\/a>))\'i', esc_html($repl), $content);
if($content1 !== null)
{
$content = $content1;
}
}
}
}
return $content;
}
add_filter('the_content', 'crawlomatic_add_affiliate_content_keyword');
add_filter('the_excerpt', 'crawlomatic_add_affiliate_content_keyword');
function crawlomatic_add_affiliate_content_keyword($content)
{
$rules = get_option('crawlomatic_keyword_list');
if(!is_array($rules))
{
$rules = array();
}
$output = '';
if (!empty($rules)) {
foreach ($rules as $request => $value) {
if(isset($value[2]) && $value[2] == 'title')
{
continue;
}
if (is_array($value) && isset($value[1]) && $value[1] != '') {
$repl = stripslashes($value[1]);
} else {
$repl = stripslashes($request);
}
if (isset($value[0]) && $value[0] != '') {
$content1 = preg_replace('\'(?!((<.*?)|(<a.*?)))(' . preg_quote(stripslashes($request)) . ')(?!(([^<>]*?)>)|([^>]*?<\/a>))\'i', '<a href="' . stripslashes($value[0]) . '" target="_blank">' . esc_html($repl) . '</a>', $content);
if($content1 !== null)
{
$content = $content1;
}
} else {
$content1 = preg_replace('\'(?!((<.*?)|(<a.*?)))(' . preg_quote(stripslashes($request)) . ')(?!(([^<>]*?)>)|([^>]*?<\/a>))\'i', esc_html($repl), $content);
if($content1 !== null)
{
$content = $content1;
}
}
}
}
return $content;
}
function crawlomatic_meta_box_function($post)
{
wp_register_style('crawlomatic-browser-style', plugins_url('styles/crawlomatic-browser.css', __FILE__), false, '1.0.0');
wp_enqueue_style('crawlomatic-browser-style');
wp_suspend_cache_addition(true);
$index = get_post_meta($post->ID, 'crawlomatic_parent_rule', true);
$title = get_post_meta($post->ID, 'crawlomatic_item_title', true);
$cats = get_post_meta($post->ID, 'crawlomatic_extra_categories', true);
$tags = get_post_meta($post->ID, 'crawlomatic_extra_tags', true);
$img = get_post_meta($post->ID, 'crawlomatic_featured_img', true);
$post_img = get_post_meta($post->ID, 'crawlomatic_post_img', true);
$crawlomatic_timestamp = get_post_meta($post->ID, 'crawlomatic_timestamp', true);
$crawlomatic_post_date = get_post_meta($post->ID, 'crawlomatic_post_date', true);
$crawlomatic_post_url = get_post_meta($post->ID, 'crawlomatic_post_url', true);
$crawlomatic_enable_pingbacks = get_post_meta($post->ID, 'crawlomatic_enable_pingbacks', true);
$crawlomatic_comment_status = get_post_meta($post->ID, 'crawlomatic_comment_status', true);
$crawlomatic_delete_time = get_post_meta($post->ID, 'crawlomatic_delete_time', true);
if (isset($index) && $index != '') {
$ech = '<table class="crf_table"><tr><td><b>' . esc_html__('Post Parent Rule:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_html($index) . '</td></tr>';
$ech .= '<tr><td><b>' . esc_html__('Post Original Title:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_html($title) . '</td></tr>';
if ($crawlomatic_timestamp != '') {
$ech .= '<tr><td><b>' . esc_html__('Post Creation Date:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_html($crawlomatic_timestamp) . '</td></tr>';
}
if ($cats != '') {
$ech .= '<tr><td><b>' . esc_html__('Post Categories:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_html($cats) . '</td></tr>';
}
if ($tags != '') {
$ech .= '<tr><td><b>' . esc_html__('Post Tags:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_html($tags) . '</td></tr>';
}
if ($img != '') {
$ech .= '<tr><td><b>' . esc_html__('Featured Image:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_url($img) . '</td></tr>';
}
if ($post_img != '') {
$ech .= '<tr><td><b>' . esc_html__('Post Image:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_url($post_img) . '</td></tr>';
}
if ($crawlomatic_post_date != '') {
$ech .= '<tr><td><b>' . esc_html__('Item Source URL Date:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_html($crawlomatic_post_date) . '</td></tr>';
}
if ($crawlomatic_post_url != '') {
$ech .= '<tr><td><b>' . esc_html__('Item Source URL:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_url($crawlomatic_post_url) . '</td></tr>';
}
if ($crawlomatic_enable_pingbacks != '') {
$ech .= '<tr><td><b>' . esc_html__('Pingback/Trackback Status:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_html($crawlomatic_enable_pingbacks) . '</td></tr>';
}
if ($crawlomatic_comment_status != '') {
$ech .= '<tr><td><b>' . esc_html__('Comment Status:', 'crawlomatic-multipage-scraper-post-generator') . '</b></td><td> ' . esc_html($crawlomatic_comment_status) . '</td></tr>';
}
if ($crawlomatic_delete_time != '') {
$ech .= '<tr><td><b>Auto Delete Post:</b></td><td> ' . gmdate("Y-m-d H:i:s", intval($crawlomatic_delete_time)) . '</td></tr>';
}
$ech .= '</table><br/>';
} else {
$ech = esc_html__('This is not an automatically generated post.', 'crawlomatic-multipage-scraper-post-generator');
}
echo $ech;
wp_suspend_cache_addition(false);
}
foreach( [ 'post', 'page', 'post_type' ] as $type )
{
add_filter($type . '_link','crawlomatic_permalink_changer', 10, 2 );
}
add_filter('the_permalink','crawlomatic_permalink_changer', 10, 2 );
function crawlomatic_permalink_changer($link, $postid = ''){
$le_post_id = '';
if(is_numeric($postid))
{
$le_post_id = $postid;
}
elseif(isset($postid->ID))
{
$le_post_id = $postid->ID;
}
else
{
global $post;
if(isset($post->ID))
{
$le_post_id = $post->ID;
}
}
if (!empty($le_post_id)) {
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['crawlomatic_enabled']) && $crawlomatic_Main_Settings['crawlomatic_enabled'] == 'on') {
if (isset($crawlomatic_Main_Settings['link_source']) && $crawlomatic_Main_Settings['link_source'] == 'on') {
$url = get_post_meta($le_post_id, 'crawlomatic_change_title_link', true);
if ( trim($url) == '1')
{
$new_url = get_post_meta($le_post_id, 'crawlomatic_post_url', true);
if(trim($new_url) != '') {
return $new_url;
}
}
}
}
}
return $link;
}
function crawlomatic_addPostMeta($post_id, $post, $param, $featured_img, $title_url, $css_cont, $rule_unique_id, $crawlomatic_Main_Settings)
{
update_post_meta($post_id, 'crawlomatic_parent_rule', $param);
if (!isset($crawlomatic_Main_Settings['crawlomatic_enable_pingbacks']) || $crawlomatic_Main_Settings['crawlomatic_enable_pingbacks'] != 'on') {
update_post_meta($post_id, 'crawlomatic_enable_pingbacks', $post['crawlomatic_enable_pingbacks']);
}
if (!isset($crawlomatic_Main_Settings['crawlomatic_comment_status']) || $crawlomatic_Main_Settings['crawlomatic_comment_status'] != 'on') {
update_post_meta($post_id, 'crawlomatic_comment_status', $post['comment_status']);
}
if (!isset($crawlomatic_Main_Settings['crawlomatic_item_title']) || $crawlomatic_Main_Settings['crawlomatic_item_title'] != 'on') {
update_post_meta($post_id, 'crawlomatic_item_title', $post['original_title']);
}
if (!isset($crawlomatic_Main_Settings['crawlomatic_extra_categories']) || $crawlomatic_Main_Settings['crawlomatic_extra_categories'] != 'on') {
update_post_meta($post_id, 'crawlomatic_extra_categories', $post['extra_categories']);
}
if (!isset($crawlomatic_Main_Settings['crawlomatic_extra_tags']) || $crawlomatic_Main_Settings['crawlomatic_extra_tags'] != 'on') {
update_post_meta($post_id, 'crawlomatic_extra_tags', $post['extra_tags']);
}
if (!isset($crawlomatic_Main_Settings['crawlomatic_post_img']) || $crawlomatic_Main_Settings['crawlomatic_post_img'] != 'on') {
update_post_meta($post_id, 'crawlomatic_post_img', $post['crawlomatic_post_image']);
}
update_post_meta($post_id, 'crawlomatic_featured_img', $featured_img);
if (!isset($crawlomatic_Main_Settings['crawlomatic_timestamp']) || $crawlomatic_Main_Settings['crawlomatic_timestamp'] != 'on') {
update_post_meta($post_id, 'crawlomatic_timestamp', $post['crawlomatic_timestamp']);
}
update_post_meta($post_id, 'crawlomatic_post_url', $post['crawlomatic_post_url']);
update_post_meta($post_id, 'crawlomatic_rule_id', $rule_unique_id);
update_post_meta($post_id, 'crawlomatic_post_orig_url', $post['crawlomatic_post_orig_url']);
if (!isset($crawlomatic_Main_Settings['crawlomatic_post_date']) || $crawlomatic_Main_Settings['crawlomatic_post_date'] != 'on') {
update_post_meta($post_id, 'crawlomatic_post_date', $post['crawlomatic_post_date']);
}
if($css_cont != '')
{
update_post_meta($post_id, 'crawlomatic_css_cont', $css_cont);
}
if($post['auto_delete'] != '' && is_numeric($post['auto_delete']))
{
update_post_meta($post_id, 'crawlomatic_delete_time', intval($post['auto_delete']));
}
if($title_url == '1')
{
update_post_meta($post_id, 'crawlomatic_change_title_link', '1');
}
}
function crawlomatic_generate_featured_image($image_url, $post_id, $use_proxy, $request_delay, $custom_user_agent, $custom_cookies, $user_pass)
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
$upload_dir = wp_upload_dir();
if(!function_exists('is_plugin_active'))
{
include_once( ABSPATH . 'wp-admin/includes/plugin.php' );
}
if (isset($crawlomatic_Main_Settings['no_local_image']) && $crawlomatic_Main_Settings['no_local_image'] == 'on' && (!isset($crawlomatic_Main_Settings['url_image']) || $crawlomatic_Main_Settings['url_image'] != 'on')) {
if(!crawlomatic_url_is_image($image_url))
{
return false;
}
$file = $upload_dir['basedir'] . '/default_img_crawlomatic.jpg';
if(!$wp_filesystem->exists($file))
{
$image_data = crawlomatic_get_web_page(html_entity_decode(dirname(__FILE__) . "/images/icon.png"), '', '', $use_proxy, '', '', '', '');
if ($image_data === FALSE || strpos($image_data, '<Message>Access Denied</Message>') !== FALSE) {
return false;
}
$ret = $wp_filesystem->put_contents($file, $image_data);
if ($ret === FALSE) {
return false;
}
}
$need_attach = false;
$checking_id = get_option('crawlomatic_attach_id', false);
if($checking_id === false)
{
$need_attach = true;
}
else
{
$atturl = wp_get_attachment_url($checking_id);
if($atturl === false)
{
$need_attach = true;
}
}
if($need_attach)
{
$filename = basename(dirname(__FILE__) . "/images/icon.png");
$wp_filetype = wp_check_filetype($filename, null);
if($wp_filetype['type'] == '')
{
$wp_filetype['type'] = 'image/png';
}
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name($filename),
'post_content' => '',
'post_status' => 'inherit'
);
$attach_id = wp_insert_attachment($attachment, $file, $post_id);
if ($attach_id === 0) {
return false;
}
update_option('crawlomatic_attach_id', $attach_id);
require_once(ABSPATH . 'wp-admin/includes/image.php');
require_once(ABSPATH . 'wp-admin/includes/media.php');
$attach_data = wp_generate_attachment_metadata($attach_id, $file);
wp_update_attachment_metadata($attach_id, $attach_data);
}
else
{
$attach_id = $checking_id;
}
$res2 = set_post_thumbnail($post_id, $attach_id);
if ($res2 === FALSE) {
return false;
}
$post_title = get_the_title($post_id);
if($post_title != '')
{
update_post_meta($attach_id, '_wp_attachment_image_alt', $post_title);
}
return true;
}
elseif (isset($crawlomatic_Main_Settings['url_image']) && $crawlomatic_Main_Settings['url_image'] == 'on' && (is_plugin_active('featured-image-from-url/featured-image-from-url.php') || is_plugin_active('fifu-premium/fifu-premium.php')))
{
if(!crawlomatic_url_is_image($image_url))
{
crawlomatic_log_to_file('Provided remote image is not valid: ' . $image_url);
return false;
}
if(function_exists('fifu_dev_set_image'))
{
fifu_dev_set_image($post_id, $image_url);
}
else
{
$value = crawlomatic_get_formatted_value($image_url, '', $post_id);
$attach_id = crawlomatic_insert_attachment_by($value);
update_post_meta($post_id, '_thumbnail_id', $attach_id);
update_post_meta($post_id, 'fifu_image_url', $image_url);
update_post_meta($attach_id, '_wp_attached_file', ';' . $image_url);
$attach = get_post( $attach_id );
if($attach !== null)
{
$attach->post_author = 77777;
wp_update_post( $attach );
}
}
return true;
}
if(substr( $image_url, 0, 10 ) === "data:image")
{
$data = explode(',', $image_url);
if(isset($data[1]))
{
$image_data = base64_decode($data[1]);
if($image_data === FALSE)
{
return false;
}
}
else
{
return false;
}
preg_match('{data:image/(.*?);}', $image_url ,$ex_matches);
if(isset($ex_matches[1]))
{
$image_url = 'image.' . $ex_matches[1];
}
else
{
$image_url = 'image.jpg';
}
}
else
{
$image_data = crawlomatic_get_web_page(html_entity_decode($image_url), $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, '', '', $request_delay);
if ($image_data === FALSE || strpos($image_data, '<Message>Access Denied</Message>') !== FALSE) {
return false;
}
}
$filename = basename($image_url);
$filename = explode("?", $filename);
$filename = $filename[0];
$filename = urlencode($filename);
$filename = str_replace('%', '-', $filename);
$filename = str_replace('#', '-', $filename);
$filename = str_replace('&', '-', $filename);
$filename = str_replace('{', '-', $filename);
$filename = str_replace('}', '-', $filename);
$filename = str_replace('\', '-', $filename);
$filename = str_replace('<', '-', $filename);
$filename = str_replace('>', '-', $filename);
$filename = str_replace('*', '-', $filename);
$filename = str_replace('/', '-', $filename);
$filename = str_replace('$', '-', $filename);
$filename = str_replace('\'', '-', $filename);
$filename = str_replace('"', '-', $filename);
$filename = str_replace(':', '-', $filename);
$filename = str_replace('@', '-', $filename);
$filename = str_replace('+', '-', $filename);
$filename = str_replace('|', '-', $filename);
$filename = str_replace('=', '-', $filename);
$filename = str_replace('`', '-', $filename);
$file_parts = pathinfo($filename);
if(isset($file_parts['extension']))
{
switch($file_parts['extension'])
{
case "":
$filename .= '.jpg';
break;
case NULL:
$filename .= '.jpg';
break;
}
}
else
{
$filename .= '.jpg';
}
$filename = stripslashes(preg_replace_callback('#(%[a-zA-Z0-9_]*)#', function($matches){ return rand(0, 9); }, preg_quote($filename)));
$filename = sanitize_file_name($filename);
if(strlen($filename) > 100)
{
$filename = substr($filename, 0, 90) . uniqid();
}
if (isset($crawlomatic_Main_Settings['random_image_names']) && $crawlomatic_Main_Settings['random_image_names'] == 'on') {
$filename = uniqid() . '.jpg';
}
if (wp_mkdir_p($upload_dir['path']))
$file = $upload_dir['path'] . '/' . $post_id . '-' . $filename;
else
$file = $upload_dir['basedir'] . '/' . $post_id . '-' . $filename;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if($wp_filesystem->exists($file))
{
$uid = uniqid();
$filename .= $uid . '.jpg';
$file .= $uid . '.jpg';
}
$ret = $wp_filesystem->put_contents($file, $image_data);
if ($ret === FALSE) {
return false;
}
if ((isset($crawlomatic_Main_Settings['resize_height']) && $crawlomatic_Main_Settings['resize_height'] !== '') || (isset($crawlomatic_Main_Settings['resize_width']) && $crawlomatic_Main_Settings['resize_width'] !== ''))
{
try
{
if(!class_exists('\Eventviva\ImageResize')){require_once (dirname(__FILE__) . "/res/ImageResize/ImageResize.php");}
$imageRes = new ImageResize($file);
$imageRes->quality_jpg = 100;
if ((isset($crawlomatic_Main_Settings['resize_height']) && $crawlomatic_Main_Settings['resize_height'] !== '') && (isset($crawlomatic_Main_Settings['resize_width']) && $crawlomatic_Main_Settings['resize_width'] !== ''))
{
$imageRes->resizeToBestFit($crawlomatic_Main_Settings['resize_width'], $crawlomatic_Main_Settings['resize_height'], true);
}
elseif (isset($crawlomatic_Main_Settings['resize_width']) && $crawlomatic_Main_Settings['resize_width'] !== '')
{
$imageRes->resizeToWidth($crawlomatic_Main_Settings['resize_width'], true);
}
elseif (isset($crawlomatic_Main_Settings['resize_height']) && $crawlomatic_Main_Settings['resize_height'] !== '')
{
$imageRes->resizeToHeight($crawlomatic_Main_Settings['resize_height'], true);
}
$imageRes->save($file);
}
catch(Exception $e)
{
crawlomatic_log_to_file('Failed to resize featured image: ' . $image_url . ' to sizes ' . $crawlomatic_Main_Settings['resize_width'] . ' - ' . $crawlomatic_Main_Settings['resize_height'] . '. Exception thrown ' . esc_html($e->getMessage()) . '!');
}
}
$wp_filetype = wp_check_filetype($filename, null);
if($wp_filetype['type'] == '')
{
$wp_filetype['type'] = 'image/png';
}
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name($filename),
'post_content' => '',
'post_status' => 'inherit'
);
$attach_id = wp_insert_attachment($attachment, $file, $post_id);
if ($attach_id === 0) {
return false;
}
require_once(ABSPATH . 'wp-admin/includes/image.php');
require_once(ABSPATH . 'wp-admin/includes/media.php');
$attach_data = wp_generate_attachment_metadata($attach_id, $file);
wp_update_attachment_metadata($attach_id, $attach_data);
$res2 = set_post_thumbnail($post_id, $attach_id);
if ($res2 === FALSE) {
return false;
}
$post_title = get_the_title($post_id);
if($post_title != '')
{
update_post_meta($attach_id, '_wp_attachment_image_alt', $post_title);
}
return true;
}
function crawlomatic_add_attachment_to_post($post_id, $attach_id)
{
wp_update_post(
array(
'ID' => $attach_id,
'post_parent' => $post_id
)
);
return true;
}
function crawlomatic_upload_attachment_media($image_url, $post_id, $use_proxy, $request_delay, $custom_user_agent, $custom_cookies, $user_pass, $counter)
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
$upload_dir = wp_upload_dir();
if(!function_exists('is_plugin_active'))
{
include_once( ABSPATH . 'wp-admin/includes/plugin.php' );
}
if(substr( $image_url, 0, 10 ) === "data:image")
{
$data = explode(',', $image_url);
if(isset($data[1]))
{
$image_data = base64_decode($data[1]);
if($image_data === FALSE)
{
return false;
}
}
else
{
return false;
}
preg_match('{data:image/(.*?);}', $image_url ,$ex_matches);
if(isset($ex_matches[1]))
{
$image_url = 'image.' . $ex_matches[1];
}
else
{
$image_url = 'image.jpg';
}
}
else
{
$image_data = crawlomatic_get_web_page(html_entity_decode($image_url), $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, '', '', $request_delay);
if ($image_data === FALSE || strpos($image_data, '<Message>Access Denied</Message>') !== FALSE) {
return false;
}
}
$filename = basename($image_url);
$filename = explode("?", $filename);
$filename = $filename[0];
$filename = urlencode($filename);
$filename = str_replace('%', '-', $filename);
$filename = str_replace('#', '-', $filename);
$filename = str_replace('&', '-', $filename);
$filename = str_replace('{', '-', $filename);
$filename = str_replace('}', '-', $filename);
$filename = str_replace('\', '-', $filename);
$filename = str_replace('<', '-', $filename);
$filename = str_replace('>', '-', $filename);
$filename = str_replace('*', '-', $filename);
$filename = str_replace('/', '-', $filename);
$filename = str_replace('$', '-', $filename);
$filename = str_replace('\'', '-', $filename);
$filename = str_replace('"', '-', $filename);
$filename = str_replace(':', '-', $filename);
$filename = str_replace('@', '-', $filename);
$filename = str_replace('+', '-', $filename);
$filename = str_replace('|', '-', $filename);
$filename = str_replace('=', '-', $filename);
$filename = str_replace('`', '-', $filename);
$file_parts = pathinfo($filename);
if(isset($file_parts['extension']))
{
switch($file_parts['extension'])
{
case "":
$filename .= '.jpg';
break;
case NULL:
$filename .= '.jpg';
break;
}
}
else
{
$filename .= '.jpg';
}
$filename = stripslashes(preg_replace_callback('#(%[a-zA-Z0-9_]*)#', function($matches){ return rand(0, 9); }, preg_quote($filename)));
$filename = sanitize_file_name($filename);
if(strlen($filename) > 100)
{
$filename = substr($filename, 0, 90) . uniqid();
}
if (isset($crawlomatic_Main_Settings['random_image_names']) && $crawlomatic_Main_Settings['random_image_names'] == 'on') {
$filename = uniqid() . '.jpg';
}
if (wp_mkdir_p($upload_dir['path']))
$file = $upload_dir['path'] . '/' . $post_id . '-' . $filename;
else
$file = $upload_dir['basedir'] . '/' . $post_id . '-' . $filename;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
if($wp_filesystem->exists($file))
{
$uid = uniqid();
$filename .= $uid . '.jpg';
$file .= $uid . '.jpg';
}
$ret = $wp_filesystem->put_contents($file, $image_data);
if ($ret === FALSE) {
return false;
}
if ((isset($crawlomatic_Main_Settings['resize_height']) && $crawlomatic_Main_Settings['resize_height'] !== '') || (isset($crawlomatic_Main_Settings['resize_width']) && $crawlomatic_Main_Settings['resize_width'] !== ''))
{
try
{
if(!class_exists('\Eventviva\ImageResize')){require_once (dirname(__FILE__) . "/res/ImageResize/ImageResize.php");}
$imageRes = new ImageResize($file);
$imageRes->quality_jpg = 100;
if ((isset($crawlomatic_Main_Settings['resize_height']) && $crawlomatic_Main_Settings['resize_height'] !== '') && (isset($crawlomatic_Main_Settings['resize_width']) && $crawlomatic_Main_Settings['resize_width'] !== ''))
{
$imageRes->resizeToBestFit($crawlomatic_Main_Settings['resize_width'], $crawlomatic_Main_Settings['resize_height'], true);
}
elseif (isset($crawlomatic_Main_Settings['resize_width']) && $crawlomatic_Main_Settings['resize_width'] !== '')
{
$imageRes->resizeToWidth($crawlomatic_Main_Settings['resize_width'], true);
}
elseif (isset($crawlomatic_Main_Settings['resize_height']) && $crawlomatic_Main_Settings['resize_height'] !== '')
{
$imageRes->resizeToHeight($crawlomatic_Main_Settings['resize_height'], true);
}
$imageRes->save($file);
}
catch(Exception $e)
{
crawlomatic_log_to_file('Failed to resize gallery image: ' . $image_url . ' to sizes ' . $crawlomatic_Main_Settings['resize_width'] . ' - ' . $crawlomatic_Main_Settings['resize_height'] . '. Exception thrown ' . esc_html($e->getMessage()) . '!');
}
}
$wp_filetype = wp_check_filetype($filename, null);
if($wp_filetype['type'] == '')
{
$wp_filetype['type'] = 'image/png';
}
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name($filename),
'post_content' => '',
'post_status' => 'inherit'
);
$attach_id = wp_insert_attachment($attachment, $file, $post_id);
if ($attach_id === 0) {
return false;
}
require_once(ABSPATH . 'wp-admin/includes/image.php');
require_once(ABSPATH . 'wp-admin/includes/media.php');
$attach_data = wp_generate_attachment_metadata($attach_id, $file);
wp_update_attachment_metadata($attach_id, $attach_data);
$post_title = get_the_title($post_id);
if($post_title != '')
{
update_post_meta($attach_id, '_wp_attachment_image_alt', $post_title . ' ' . $counter);
}
return $attach_id;
}
function crawlomatic_insert_attachment_by($value) {
global $wpdb;
$wpdb->get_results("
INSERT INTO " . $wpdb->prefix . "posts" . " (post_author, guid, post_title, post_mime_type, post_type, post_status, post_parent, post_date, post_date_gmt, post_modified, post_modified_gmt, post_content, post_excerpt, to_ping, pinged, post_content_filtered)
VALUES " . $value);
return $wpdb->insert_id;
}
function crawlomatic_get_formatted_value($url, $alt, $post_parent) {
return "(77777, '" . $url . "', '" . str_replace("'", "", $alt) . "', 'image/jpeg', 'attachment', 'inherit', '" . $post_parent . "', now(), now(), now(), now(), '', '', '', '', '')";
}
function crawlomatic_copy_image_locally($image_url, $use_proxy, $request_delay, $custom_user_agent, $user_pass, $custom_cookies, &$att_id)
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
global $wp_filesystem;
if ( ! is_a( $wp_filesystem, 'WP_Filesystem_Base') ){
include_once(ABSPATH . 'wp-admin/includes/file.php');$creds = request_filesystem_credentials( site_url() );
wp_filesystem($creds);
}
$upload_dir = wp_upload_dir();
$delay = '';
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '')
{
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(2), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
if(substr( $image_url, 0, 10 ) === "data:image")
{
$data = explode(',', $image_url);
if(isset($data[1]))
{
$image_data = base64_decode($data[1]);
if($image_data === FALSE)
{
return false;
}
}
else
{
return false;
}
preg_match('{data:image/(.*?);}', $image_url ,$ex_matches);
if(isset($ex_matches[1]))
{
$image_url = 'image.' . $ex_matches[1];
}
else
{
$image_url = 'image.jpg';
}
}
else
{
$image_data = crawlomatic_get_web_page(html_entity_decode($image_url), $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, '', '', $request_delay);
if ($image_data === FALSE || strpos($image_data, '<Message>Access Denied</Message>') !== FALSE) {
return false;
}
}
$filename = basename($image_url);
$filename = explode("?", $filename);
$filename = $filename[0];
$filename = urlencode($filename);
$filename = str_replace('%', '-', $filename);
$filename = str_replace('#', '-', $filename);
$filename = str_replace('&', '-', $filename);
$filename = str_replace('{', '-', $filename);
$filename = str_replace('}', '-', $filename);
$filename = str_replace('\', '-', $filename);
$filename = str_replace('<', '-', $filename);
$filename = str_replace('>', '-', $filename);
$filename = str_replace('*', '-', $filename);
$filename = str_replace('/', '-', $filename);
$filename = str_replace('$', '-', $filename);
$filename = str_replace('\'', '-', $filename);
$filename = str_replace('"', '-', $filename);
$filename = str_replace(':', '-', $filename);
$filename = str_replace('@', '-', $filename);
$filename = str_replace('+', '-', $filename);
$filename = str_replace('|', '-', $filename);
$filename = str_replace('=', '-', $filename);
$filename = str_replace('`', '-', $filename);
$file_parts = pathinfo($filename);
if(isset($file_parts['extension']))
{
switch($file_parts['extension'])
{
case "":
$filename .= 'jpg';
break;
case NULL:
$filename .= '.jpg';
break;
}
}
if (isset($crawlomatic_Main_Settings['random_image_names']) && $crawlomatic_Main_Settings['random_image_names'] == 'on') {
$unid = uniqid();
$file = $upload_dir['basedir'] . '/' . $unid . '.' . $file_parts['extension'];
$ret_path = $upload_dir['baseurl'] . '/' . $unid . '.' . $file_parts['extension'];
}
else
{
if (wp_mkdir_p($upload_dir['path'] . '/localimages'))
{
$file = $upload_dir['path'] . '/localimages/' . $filename;
$ret_path = $upload_dir['url'] . '/localimages/' . $filename;
}
else
{
$file = $upload_dir['basedir'] . '/' . $filename;
$ret_path = $upload_dir['baseurl'] . '/' . $filename;
}
}
if($wp_filesystem->exists($file))
{
if (isset($crawlomatic_Main_Settings['no_local_dup']) && $crawlomatic_Main_Settings['no_local_dup'] == 'on')
{
return $ret_path;
}
if(empty($file_parts['extension']))
{
$file_parts['extension'] = 'jpg';
}
$unid = uniqid();
$file .= $unid . '.' . $file_parts['extension'];
$ret_path .= $unid . '.' . $file_parts['extension'];
}
if($wp_filesystem->exists($file))
{
$uid = uniqid();
$filename .= $uid . '.jpg';
$file .= $uid . '.jpg';
}
$ret = $wp_filesystem->put_contents($file, $image_data);
if ($ret === FALSE) {
return false;
}
if (!isset($crawlomatic_Main_Settings['no_local_attach']) || $crawlomatic_Main_Settings['no_local_attach'] != 'on')
{
$wp_filetype = wp_check_filetype( $file, null );
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name( $file ),
'post_content' => '',
'post_status' => 'inherit'
);
$screens_attach_id = wp_insert_attachment( $attachment, $file );
require_once( ABSPATH . 'wp-admin/includes/image.php' );
require_once( ABSPATH . 'wp-admin/includes/media.php' );
$attach_data = wp_generate_attachment_metadata( $screens_attach_id, $file );
wp_update_attachment_metadata( $screens_attach_id, $attach_data );
$att_id = $screens_attach_id;
}
return $ret_path;
}
function crawlomatic_url_is_image( $url ) {
$url = str_replace(' ', '%20', $url);
if ( ! filter_var( $url, FILTER_VALIDATE_URL ) ) {
return FALSE;
}
$ext = array( 'jpeg', 'jpg', 'gif', 'png', 'jpe', 'tif', 'tiff', 'svg', 'ico' , 'webp', 'dds', 'heic', 'psd', 'pspimage', 'tga', 'thm', 'yuv', 'ai', 'eps', 'php');
$info = (array) pathinfo( parse_url( $url, PHP_URL_PATH ) );
if(!isset( $info['extension'] ))
{
return true;
}
return isset( $info['extension'] )
&& in_array( strtolower( $info['extension'] ), $ext, TRUE );
}
function crawlomatic_preg_grep_keys( $pattern, $input, $flags = 0 )
{
if(!is_array($input))
{
return array();
}
$keys = preg_grep( $pattern, array_keys( $input ), $flags );
$vals = array();
foreach ( $keys as $key )
{
$vals[$key] = $input[$key];
}
return $vals;
}
function crawlomatic_replace_attachment_url($att_url, $att_id) {
{
$post_id = get_the_ID();
wp_suspend_cache_addition(true);
$metas = get_post_custom($post_id);
wp_suspend_cache_addition(false);
$rez_meta = crawlomatic_preg_grep_keys('#.+?_featured_img#i', $metas);
if(count($rez_meta) > 0)
{
foreach($rez_meta as $rm)
{
if(isset($rm[0]) && $rm[0] != '' && filter_var($rm[0], FILTER_VALIDATE_URL))
{
return $rm[0];
}
}
}
}
return $att_url;
}
function crawlomatic_replace_attachment_image_src($image, $att_id, $size)
{
$post_id = get_the_ID();
wp_suspend_cache_addition(true);
$metas = get_post_custom($post_id);
wp_suspend_cache_addition(false);
$rez_meta = crawlomatic_preg_grep_keys('#.+?_featured_img#i', $metas);
if(count($rez_meta) > 0)
{
foreach($rez_meta as $rm)
{
if(isset($rm[0]) && $rm[0] != '' && filter_var($rm[0], FILTER_VALIDATE_URL))
{
return array($rm[0], 0, 0, false);
}
}
}
return $image;
}
function crawlomatic_thumbnail_external_replace( $html, $post_id, $thumb_id )
{
wp_suspend_cache_addition(true);
$metas = get_post_custom($post_id);
wp_suspend_cache_addition(false);
$rez_meta = crawlomatic_preg_grep_keys('#.+?_featured_img#i', $metas);
if(count($rez_meta) > 0)
{
foreach($rez_meta as $rm)
{
if(isset($rm[0]) && $rm[0] != '' && filter_var($rm[0], FILTER_VALIDATE_URL))
{
$alt = get_post_field( 'post_title', $post_id ) . ' ' . esc_html__( 'thumbnail', 'crawlomatic-multipage-scraper-post-generator' );
$attr = array( 'alt' => $alt );
$attx = get_post($thumb_id);
$attr = apply_filters( 'wp_get_attachment_image_attributes', $attr, $attx , 'thumbnail');
$attr = array_map( 'esc_attr', $attr );
$html = sprintf( '<img src="%s"', esc_url($rm[0]) );
foreach ( $attr as $name => $value ) {
$html .= " " . esc_html($name) . "=" . '"' . esc_attr($value) . '"';
}
$html .= ' />';
return $html;
}
}
}
return $html;
}
function crawlomatic_hour_diff($date1, $date2)
{
$date1 = new DateTime($date1, crawlomatic_get_blog_timezone());
$date2 = new DateTime($date2, crawlomatic_get_blog_timezone());
$number1 = (int) $date1->format('U');
$number2 = (int) $date2->format('U');
return ($number1 - $number2) / 60;
}
function crawlomatic_add_hour($date, $hour)
{
$date1 = new DateTime($date, crawlomatic_get_blog_timezone());
$date1->modify("$hour hours");
$date1 = (array)$date1;
foreach ($date1 as $key => $value) {
if ($key == 'date') {
return $value;
}
}
return $date;
}
function crawlomatic_minute_diff($date1, $date2)
{
$date1 = new DateTime($date1, crawlomatic_get_blog_timezone());
$date2 = new DateTime($date2, crawlomatic_get_blog_timezone());
$number1 = (int) $date1->format('U');
$number2 = (int) $date2->format('U');
return ($number1 - $number2);
}
function crawlomatic_add_minute($date, $minute)
{
$date1 = new DateTime($date, crawlomatic_get_blog_timezone());
$date1->modify("$minute minutes");
$date1 = (array)$date1;
foreach ($date1 as $key => $value) {
if ($key == 'date') {
return $value;
}
}
return $date;
}
function crawlomatic_wp_custom_css_files($src, $cont)
{
wp_enqueue_style('crawlomatic-thumbnail-css-' . $cont, $src, __FILE__);
}
function crawlomatic_get_date_now($param = 'now')
{
$date = new DateTime($param, crawlomatic_get_blog_timezone());
$date = (array)$date;
foreach ($date as $key => $value) {
if ($key == 'date') {
return $value;
}
}
return '';
}
function crawlomatic_create_terms($taxonomy, $parent, $terms_str, $remove_cats)
{
if($remove_cats != '')
{
$remove_cats = explode(',', $remove_cats);
}
else
{
$remove_cats = array();
}
$terms = explode(',', $terms_str);
$categories = array();
$parent_term_id = $parent;
foreach ($terms as $term) {
$term = trim($term);
$skip = false;
foreach($remove_cats as $skip)
{
if(strcasecmp(trim($skip), $term) == 0)
{
$skip = true;
break;
}
}
if($skip === true)
{
continue;
}
$res = term_exists($term, $taxonomy, $parent);
if ($res != NULL && $res != 0 && count($res) > 0 && isset($res['term_id'])) {
$parent_term_id = $res['term_id'];
$categories[] = $parent_term_id;
} else {
if($parent === null)
{
$insert_parent = 0;
}
else
{
$insert_parent = $parent;
}
$new_term = wp_insert_term($term, $taxonomy, array(
'parent' => $insert_parent
));
if (!is_wp_error( $new_term ) && $new_term != NULL && $new_term != 0 && count($new_term) > 0 && isset($new_term['term_id'])) {
$parent_term_id = $new_term['term_id'];
$categories[] = $parent_term_id;
}
}
}
return $categories;
}
function crawlomatic_getExcerpt($the_content)
{
$preview = crawlomatic_strip_html_tags($the_content);
$preview = crawlomatic_wp_trim_words($preview, 55, '');
$preview = trim($preview, ',');
return $preview;
}
function crawlomatic_getPlainContent($the_content)
{
$preview = crawlomatic_strip_html_tags($the_content);
$preview = crawlomatic_wp_trim_words($preview, 999999);
return $preview;
}
function crawlomatic_getItemImage($img, $just_title)
{
if($img == '')
{
return '';
}
$preview = '<img src="' . esc_url($img) . '" alt="' . esc_html($just_title) . '" />';
return $preview;
}
function crawlomatic_getReadMoreButton($url, $read_more)
{
$link = '';
if($read_more == ' '){
return '';
}
if (isset($url)) {
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if($read_more == '')
{
if (isset($crawlomatic_Main_Settings['read_more_text']) && $crawlomatic_Main_Settings['read_more_text'] != '') {
$read_more = $crawlomatic_Main_Settings['read_more_text'];
}
else
{
$read_more = esc_html__('Read More', 'crawlomatic-multipage-scraper-post-generator');
}
}
$link = '<a rel="nofollow noopener" href="' . $url . '" class="button purchase" target="_blank">' . esc_html($read_more) . '</a>';
}
return $link;
}
add_action('init', 'crawlomatic_create_taxonomy', 0);
add_action( 'enqueue_block_editor_assets', 'crawlomatic_enqueue_block_editor_assets' );
function crawlomatic_enqueue_block_editor_assets() {
wp_register_style('crawlomatic-browser-style', plugins_url('styles/crawlomatic-browser.css', __FILE__), false, '1.0.0');
wp_enqueue_style('crawlomatic-browser-style');
$block_js_display = 'scripts/display-posts.js';
wp_enqueue_script(
'crawlomatic-display-block-js',
plugins_url( $block_js_display, __FILE__ ),
array(
'wp-blocks',
'wp-i18n',
'wp-element',
),
'1.0.0'
);
$block_js_list = 'scripts/list-posts.js';
wp_enqueue_script(
'crawlomatic-list-block-js',
plugins_url( $block_js_list, __FILE__ ),
array(
'wp-blocks',
'wp-i18n',
'wp-element',
),
'1.0.0'
);
$block_js_list = 'scripts/crawler.js';
wp_enqueue_script(
'crawlomatic-crawler-gut-js',
plugins_url( $block_js_list, __FILE__ ),
array(
'wp-blocks',
'wp-i18n',
'wp-element',
),
'1.0.0'
);
}
function crawlomatic_create_taxonomy()
{
add_shortcode('crawlomatic-scraper', array( 'Crawlomatic_Shortcode_Scraper', 'shortcode' ));
add_filter('widget_text', 'do_shortcode');
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['crawlomatic_enabled']) && $crawlomatic_Main_Settings['crawlomatic_enabled'] === 'on') {
if (isset($crawlomatic_Main_Settings['no_local_image']) && $crawlomatic_Main_Settings['no_local_image'] == 'on' && (!isset($crawlomatic_Main_Settings['url_image']) || $crawlomatic_Main_Settings['url_image'] != 'on')) {
add_filter('wp_get_attachment_url', 'crawlomatic_replace_attachment_url', 10, 2);
add_filter('wp_get_attachment_image_src', 'crawlomatic_replace_attachment_image_src', 10, 3);
add_filter('post_thumbnail_html', 'crawlomatic_thumbnail_external_replace', 10, 6);
}
}
if ( function_exists( 'register_block_type' ) ) {
register_block_type( 'crawlomatic-multipage-scraper-post-generator/crawlomatic-display', array(
'render_callback' => 'crawlomatic_display_posts_shortcode',
) );
register_block_type( 'crawlomatic-multipage-scraper-post-generator/crawlomatic-list', array(
'render_callback' => 'crawlomatic_list_posts',
) );
register_block_type( 'crawlomatic-multipage-scraper-post-generator/crawlomatic-scraper', array(
'render_callback' => array( 'Crawlomatic_Shortcode_Scraper', 'shortcode' ),
) );
}
add_image_size( 'crawlomatic_preview_image', 260, 146);
if(!taxonomy_exists('coderevolution_post_source'))
{
$labels = array(
'name' => _x('Post Source', 'taxonomy general name', 'crawlomatic-multipage-scraper-post-generator'),
'singular_name' => _x('Post Source', 'taxonomy singular name', 'crawlomatic-multipage-scraper-post-generator'),
'search_items' => esc_html__('Search Post Source', 'crawlomatic-multipage-scraper-post-generator'),
'popular_items' => esc_html__('Popular Post Source', 'crawlomatic-multipage-scraper-post-generator'),
'all_items' => esc_html__('All Post Sources', 'crawlomatic-multipage-scraper-post-generator'),
'parent_item' => null,
'parent_item_colon' => null,
'edit_item' => esc_html__('Edit Post Source', 'crawlomatic-multipage-scraper-post-generator'),
'update_item' => esc_html__('Update Post Source', 'crawlomatic-multipage-scraper-post-generator'),
'add_new_item' => esc_html__('Add New Post Source', 'crawlomatic-multipage-scraper-post-generator'),
'new_item_name' => esc_html__('New Post Source Name', 'crawlomatic-multipage-scraper-post-generator'),
'separate_items_with_commas' => esc_html__('Separate Post Source with commas', 'crawlomatic-multipage-scraper-post-generator'),
'add_or_remove_items' => esc_html__('Add or remove Post Source', 'crawlomatic-multipage-scraper-post-generator'),
'choose_from_most_used' => esc_html__('Choose from the most used Post Source', 'crawlomatic-multipage-scraper-post-generator'),
'not_found' => esc_html__('No Post Sources found.', 'crawlomatic-multipage-scraper-post-generator'),
'menu_name' => esc_html__('Post Source', 'crawlomatic-multipage-scraper-post-generator')
);
$args = array(
'hierarchical' => false,
'public' => false,
'show_ui' => false,
'show_in_menu' => false,
'description' => 'Post Source',
'labels' => $labels,
'show_admin_column' => true,
'update_count_callback' => '_update_post_term_count',
'rewrite' => false
);
$add_post_type = array(
'post',
'page'
);
$xargs = array(
'public' => true,
'_builtin' => false
);
$output = 'names';
$operator = 'and';
$post_types = get_post_types( $xargs, $output, $operator );
if ( $post_types )
{
foreach ( $post_types as $post_type ) {
$add_post_type[] = $post_type;
}
}
register_taxonomy('coderevolution_post_source', $add_post_type, $args);
add_action('pre_get_posts', function($qry) {
if (is_admin()) return;
if (is_tax('coderevolution_post_source')){
$qry->set_404();
}
});
}
}
function crawlomatic_testPhantom()
{
if(!function_exists('shell' . '_exec')) {
return -1;
}
$disabled = explode(',', ini_get('disable_functions'));
if(in_array('shell' . '_exec', $disabled))
{
return -2;
}
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['phantom_path']) && $crawlomatic_Main_Settings['phantom_path'] != '')
{
$phantomjs_comm = $crawlomatic_Main_Settings['phantom_path'] . ' ';
}
else
{
$phantomjs_comm = 'phantomjs ';
}
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('PhantomJS TEST command: ' . $phantomjs_comm);
}
$shefunc = trim(' s ') . trim(' h ') . 'ell' . '_exec';
$cmdResult = $shefunc($phantomjs_comm . '-h 2>&1');
if(stristr($cmdResult, 'Usage') !== false)
{
return 1;
}
return 0;
}
function crawlomatic_testTor()
{
if(!function_exists('shell' . '_exec')) {
return -1;
}
$disabled = explode(',', ini_get('disable_functions'));
if(in_array('shell' . '_exec', $disabled))
{
return -2;
}
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$custom_user_agent = 'default';
$custom_cookies = 'default';
$user_pass = 'default';
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = 'default';
}
$phantomjs_proxcomm = '"null"';
$url = 'https://example.com';
$puppeteer_comm = 'node ';
$puppeteer_comm .= '"' . dirname(__FILE__) . '/res/puppeteer/torcheck.js" "' . $url . '" ' . $phantomjs_proxcomm . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . $phantomjs_timeout . '" "0"';
$puppeteer_comm .= ' 2>&1';
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Puppeteer-Tor TEST command: ' . $puppeteer_comm);
}
$shefunc = trim(' s ') . trim(' h ') . 'ell' . '_exec';
$cmdResult = $shefunc($puppeteer_comm);
if($cmdResult === NULL || $cmdResult == '')
{
crawlomatic_log_to_file('puppeteer-tor did not return usable info for: ' . $url);
return 0;
}
if(trim($cmdResult) === 'timeout')
{
crawlomatic_log_to_file('puppeteer timed out while getting page (tor): ' . $url. ' - please increase timeout in Main Settings');
return 0;
}
if(stristr($cmdResult, 'sh: node: command not found') !== false || stristr($cmdResult, 'throw err;') !== false)
{
crawlomatic_log_to_file('nodeJS not found, please install it on your server');
return 0;
}
if(stristr($cmdResult, 'sh: puppeteer: command not found') !== false)
{
crawlomatic_log_to_file('puppeteer not found, please install it on your server (also tor)');
return 0;
}
if(stristr($cmdResult, 'Error: Cannot find module \'puppeteer\'') !== false)
{
crawlomatic_log_to_file('puppeteer module not found, please install it on your server');
return 0;
}
if(stristr($cmdResult, 'CRAWLOMATIC NOT USING TOR!') !== false)
{
crawlomatic_log_to_file('Tor was not able to be used by Crawlomatic/Puppeteer. Please install Tor on your server!');
return 0;
}
if(stristr($cmdResult, 'res/puppeteer/torcheck.js:') !== false)
{
crawlomatic_log_to_file('torcheck failed to run, error: ' . $cmdResult);
return 0;
}
if(stristr($cmdResult, 'TOR OK!') !== false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Tor OK!');
}
return 1;
}
crawlomatic_log_to_file('Tor returned unknown result: ' . $cmdResult);
return 0;
}
function crawlomatic_testPuppeteer()
{
if(!function_exists('shell' . '_exec')) {
return -1;
}
$disabled = explode(',', ini_get('disable_functions'));
if(in_array('shell' . '_exec', $disabled))
{
return -2;
}
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$custom_user_agent = 'default';
$custom_cookies = 'default';
$user_pass = 'default';
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = 'default';
}
$url = 'https://example.com';
$phantomjs_proxcomm = '"null"';
$puppeteer_comm = 'node ';
$puppeteer_comm .= '"' . dirname(__FILE__) . '/res/puppeteer/puppeteer.js" "' . $url . '" ' . $phantomjs_proxcomm . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . $phantomjs_timeout . '"';
$puppeteer_comm .= ' 2>&1';
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Puppeteer TEST command: ' . $puppeteer_comm);
}
$shefunc = trim(' s ') . trim(' h ') . 'ell' . '_exec';
$cmdResult = $shefunc($puppeteer_comm);
if($cmdResult === NULL || $cmdResult == '')
{
crawlomatic_log_to_file('puppeteer did not return usable info for: ' . $url);
return 0;
}
if(trim($cmdResult) === 'timeout')
{
crawlomatic_log_to_file('puppeteer timed out while getting page: ' . $url. ' - please increase timeout in Main Settings');
return 0;
}
if(stristr($cmdResult, 'sh: node: command not found') !== false || stristr($cmdResult, 'throw err;') !== false)
{
crawlomatic_log_to_file('nodeJS not found, please install it on your server');
return 0;
}
if(stristr($cmdResult, 'sh: puppeteer: command not found') !== false)
{
crawlomatic_log_to_file('puppeteer not found, please install it on your server');
return 0;
}
if(stristr($cmdResult, 'res/puppeteer/puppeteer.js:') !== false)
{
crawlomatic_log_to_file('puppeteercheck failed to run, error: ' . $cmdResult);
return 0;
}
if(stristr($cmdResult, 'Example Domain') !== false)
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Puppeteer OK!');
}
return 1;
}
crawlomatic_log_to_file('Puppeteer returned unknown result: ' . $cmdResult);
return 0;
}
function crawlomatic_get_page_Tor($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, $timeout = '', $request_delay = '', $scripter = '', $local_storage = '')
{
if($custom_user_agent == 'none')
{
$custom_user_agent = '';
}
elseif($custom_user_agent == '')
{
$custom_user_agent = crawlomatic_get_random_user_agent();
}
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if(!function_exists('shell' . '_exec')) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('shel' . 'l_exec not found!');
}
return false;
}
$disabled = explode(',', ini_get('disable_functions'));
if(in_array('shell' . '_exec', $disabled))
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('shel' . 'l_exec disabled');
}
return false;
}
$delay = '';
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '') {
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(3), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
if($timeout != '')
{
$timeout = 'default';
}
if($scripter == '')
{
$scripter = 'default';
}
if($local_storage == '')
{
$local_storage = 'default';
}
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = 'default';
}
$phantomjs_proxcomm = '"null"';
$puppeteer_comm = 'node ';
$puppeteer_comm .= '"' . dirname(__FILE__) . '/res/puppeteer/tor.js" "' . $url . '" ' . $phantomjs_proxcomm . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . $phantomjs_timeout . '" "1" "' . $timeout . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '"';
$puppeteer_comm .= ' 2>&1';
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Puppeteer-Tor command: ' . $puppeteer_comm);
}
$shefunc = trim(' s ') . trim(' h ') . 'ell' . '_exec';
$cmdResult = $shefunc($puppeteer_comm);
if($cmdResult === NULL || $cmdResult == '')
{
crawlomatic_log_to_file('puppeteer-tor did not return usable info for: ' . $url);
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(trim($cmdResult) === 'timeout')
{
crawlomatic_log_to_file('puppeteer timed out while getting page (tor): ' . $url. ' - please increase timeout in Main Settings');
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'Error: Cannot find module \'puppeteer\'') !== false)
{
crawlomatic_log_to_file('puppeteer not found on server: ' . $cmdResult);
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'sh: node: command not found') !== false || stristr($cmdResult, 'throw err;') !== false)
{
crawlomatic_log_to_file('nodeJS not found, please install it on your server');
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'sh: puppeteer: command not found') !== false)
{
crawlomatic_log_to_file('puppeteer not found, please install it on your server (also tor)');
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'CRAWLOMATIC NOT USING TOR!') !== false)
{
crawlomatic_log_to_file('Tor was not able to be used by Crawlomatic/Puppeteer. Please install Tor on your server!');
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'process.on(\'unhandledRejection\', up => { throw up })') !== false)
{
crawlomatic_log_to_file('puppeteer failed to download resource: ' . $url . ' - error: ' . $cmdResult);
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'Unhandled Rejection, reason: { TimeoutError') !== false)
{
crawlomatic_log_to_file('puppeteer failed to download resource: ' . $url . ' - timeout error: ' . $cmdResult);
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'res/puppeteer/tor.js:') !== false)
{
crawlomatic_log_to_file('tor failed to run, error: ' . $cmdResult);
return false;
}
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
//crawlomatic_log_to_file('Downloaded site (Puppeteer): ' . $url . ' -- ' . esc_html($cmdResult));
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return $cmdResult;
}
function crawlomatic_get_page_PuppeteerAPI($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, $timeout = '', $request_delay = '', $scripter = '', $local_storage = '', $auto_captcha = '', $enable_adblock = '', $clickelement = '')
{
if($custom_user_agent == 'none')
{
$custom_user_agent = '';
}
elseif($custom_user_agent == '')
{
$custom_user_agent = crawlomatic_get_random_user_agent();
}
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (!isset($crawlomatic_Main_Settings['headlessbrowserapi_key']) || trim($crawlomatic_Main_Settings['headlessbrowserapi_key']) == '')
{
crawlomatic_log_to_file('You need to add your HeadlessBrowserAPI key in the plugin\'s \'Main Settings\' before you can use this feature.');
return false;
}
$delay = '';
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '') {
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(4), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = 'default';
}
$phantomjs_proxcomm = '"null"';
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '')
{
$proxy_url = $crawlomatic_Main_Settings['proxy_url'];
if(isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$proxy_auth = $crawlomatic_Main_Settings['proxy_auth'];
}
else
{
$proxy_auth = 'default';
}
}
else
{
$proxy_url = 'default';
$proxy_auth = 'default';
}
$za_api_url = 'https://headlessbrowserapi.com/apis/scrape/v1/puppeteer?apikey=' . trim($crawlomatic_Main_Settings['headlessbrowserapi_key']) . '&url=' . urlencode($url) . '&custom_user_agent=' . urlencode($custom_user_agent) . '&custom_cookies=' . urlencode($custom_cookies) . '&user_pass=' . urlencode($user_pass) . '&timeout=' . urlencode($phantomjs_timeout) . '&proxy_url=' . urlencode($proxy_url) . '&proxy_auth=' . urlencode($proxy_auth);
if($timeout != '')
{
$za_api_url .= '&sleep=' . urlencode($timeout);
}
if(trim($scripter) != '')
{
$za_api_url .= '&jsexec=' . urlencode(trim($scripter));
}
if(trim($local_storage) != '')
{
$za_api_url .= '&localstorage=' . urlencode(trim($local_storage));
}
$api_timeout = 120;
if(trim($auto_captcha) == '1')
{
$api_timeout += 120;
$za_api_url .= '&solvecaptcha=' . trim($auto_captcha);
}
if(trim($enable_adblock) == '1')
{
$za_api_url .= '&enableadblock=' . trim($enable_adblock);
}
if(trim($clickelement) != '')
{
$za_api_url .= '&clickelement=' . trim($clickelement);
}
$args = array(
'timeout' => $api_timeout,
'redirection' => 10,
'blocking' => true,
'compress' => false,
'decompress' => true,
'sslverify' => false,
'stream' => false
);
$ret_data = wp_remote_get($za_api_url, $args);
$response_code = wp_remote_retrieve_response_code( $ret_data );
$response_message = wp_remote_retrieve_response_message( $ret_data );
if ( 200 != $response_code ) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Failed to get response from HeadlessBrowserAPI: ' . $za_api_url . ' code: ' . $response_code . ' message: ' . $response_message);
if(isset($ret_data->errors['http_request_failed']))
{
foreach($ret_data->errors['http_request_failed'] as $errx)
{
crawlomatic_log_to_file('Error message: ' . html_entity_decode($errx));
}
}
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
} else {
$cmdResult = wp_remote_retrieve_body( $ret_data );
}
$jcmdResult = json_decode($cmdResult, true);
if($jcmdResult === false)
{
crawlomatic_log_to_file('Failed to decode response from HeadlessBrowserAPI (puppeteer): ' . $za_api_url . ' - ' . print_r($cmdResult, true));
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
$cmdResult = $jcmdResult;
if(isset($cmdResult['apicalls']))
{
update_option('headless_calls', esc_html($cmdResult['apicalls']));
}
if(isset($cmdResult['error']))
{
crawlomatic_log_to_file('An error occurred while getting content from HeadlessBrowserAPI: ' . $za_api_url . ' - ' . print_r($cmdResult['error'], true));
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(!isset($cmdResult['html']))
{
crawlomatic_log_to_file('Malformed data imported from HeadlessBrowserAPI: ' . $za_api_url . ' - ' . print_r($cmdResult, true));
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return '<html><body>' . $cmdResult['html'] . '</body></html>';
}
function crawlomatic_get_screenshot_PuppeteerAPI($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, $timeout = '', $request_delay = '', $scripter = '', $local_storage = '', $h = '0', $w = '1920', $auto_captcha = '', $enable_adblock = '', $clickelement = '')
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (!isset($crawlomatic_Main_Settings['headlessbrowserapi_key']) || trim($crawlomatic_Main_Settings['headlessbrowserapi_key']) == '')
{
crawlomatic_log_to_file('You need to add your HeadlessBrowserAPI key in the plugin\'s \'Main Settings\' before you can use this feature.');
return false;
}
$delay = '';
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '') {
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(5), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = 'default';
}
$phantomjs_proxcomm = '"null"';
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '')
{
$proxy_url = $crawlomatic_Main_Settings['proxy_url'];
if(isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$proxy_auth = $crawlomatic_Main_Settings['proxy_auth'];
}
else
{
$proxy_auth = 'default';
}
}
else
{
$proxy_url = 'default';
$proxy_auth = 'default';
}
if($h == '')
{
$h = '0';
}
if($w == '')
{
$w = '1920';
}
$za_api_url = 'https://headlessbrowserapi.com/apis/scrape/v1/screenshot?apikey=' . trim($crawlomatic_Main_Settings['headlessbrowserapi_key']) . '&url=' . urlencode($url) . '&custom_user_agent=' . urlencode($custom_user_agent) . '&custom_cookies=' . urlencode($custom_cookies) . '&user_pass=' . urlencode($user_pass) . '&timeout=' . urlencode($phantomjs_timeout) . '&proxy_url=' . urlencode($proxy_url) . '&proxy_auth=' . urlencode($proxy_auth) . '&height=' . urlencode($h) . '&width=' . urlencode($w);
if(trim($scripter) != '')
{
$za_api_url .= '&jsexec=' . urlencode(trim($scripter));
}
if(trim($local_storage) != '')
{
$za_api_url .= '&localstorage=' . urlencode(trim($local_storage));
}
$api_timeout = 120;
if(trim($auto_captcha) == '1')
{
$api_timeout += 120;
$za_api_url .= '&solvecaptcha=' . trim($auto_captcha);
}
if(trim($enable_adblock) == '1')
{
$za_api_url .= '&enableadblock=' . trim($enable_adblock);
}
if(trim($clickelement) != '')
{
$za_api_url .= '&clickelement=' . trim($clickelement);
}
$args = array(
'timeout' => $api_timeout,
'redirection' => 10,
'blocking' => true,
'compress' => false,
'decompress' => true,
'sslverify' => false,
'stream' => false
);
$ret_data = wp_remote_get($za_api_url, $args);
$response_code = wp_remote_retrieve_response_code( $ret_data );
$response_message = wp_remote_retrieve_response_message( $ret_data );
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
if ( 200 != $response_code ) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Failed to get response from HeadlessBrowserAPI: ' . $za_api_url . ' code: ' . $response_code . ' message: ' . $response_message);
if(isset($ret_data->errors['http_request_failed']))
{
foreach($ret_data->errors['http_request_failed'] as $errx)
{
crawlomatic_log_to_file('Error message: ' . html_entity_decode($errx));
}
}
}
return false;
} else {
$cmdResult = wp_remote_retrieve_body( $ret_data );
}
if(isset($cmdResult['apicalls']))
{
update_option('headless_calls', esc_html($cmdResult['apicalls']));
}
if(strstr($cmdResult, '"error"') !== false)
{
crawlomatic_log_to_file('Failed to decode response from HeadlessBrowserAPI: ' . $za_api_url . ' - ' . print_r($cmdResult, true));
return false;
}
return $cmdResult;
}
function crawlomatic_get_page_TorAPI($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, $timeout = '', $request_delay = '', $scripter = '', $local_storage = '', $auto_captcha = '', $enable_adblock = '', $clickelement = '')
{
if($custom_user_agent == 'none')
{
$custom_user_agent = '';
}
elseif($custom_user_agent == '')
{
$custom_user_agent = crawlomatic_get_random_user_agent();
}
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (!isset($crawlomatic_Main_Settings['headlessbrowserapi_key']) || trim($crawlomatic_Main_Settings['headlessbrowserapi_key']) == '')
{
crawlomatic_log_to_file('You need to add your HeadlessBrowserAPI key in the plugin\'s \'Main Settings\' before you can use this feature.');
return false;
}
$delay = '';
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '') {
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(6), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = 'default';
}
$phantomjs_proxcomm = '"null"';
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '')
{
$proxy_url = $crawlomatic_Main_Settings['proxy_url'];
if(isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$proxy_auth = $crawlomatic_Main_Settings['proxy_auth'];
}
else
{
$proxy_auth = 'default';
}
}
else
{
$proxy_url = 'default';
$proxy_auth = 'default';
}
$za_api_url = 'https://headlessbrowserapi.com/apis/scrape/v1/tor?apikey=' . trim($crawlomatic_Main_Settings['headlessbrowserapi_key']) . '&url=' . urlencode($url) . '&custom_user_agent=' . urlencode($custom_user_agent) . '&custom_cookies=' . urlencode($custom_cookies) . '&user_pass=' . urlencode($user_pass) . '&timeout=' . urlencode($phantomjs_timeout) . '&proxy_url=' . urlencode($proxy_url) . '&proxy_auth=' . urlencode($proxy_auth);
if($timeout != '')
{
$za_api_url .= '&sleep=' . urlencode($timeout);
}
if(trim($scripter) != '')
{
$za_api_url .= '&jsexec=' . urlencode(trim($scripter));
}
if(trim($local_storage) != '')
{
$za_api_url .= '&localstorage=' . urlencode(trim($local_storage));
}
$api_timeout = 120;
if(trim($auto_captcha) == '1')
{
$api_timeout += 120;
$za_api_url .= '&solvecaptcha=' . trim($auto_captcha);
}
if(trim($enable_adblock) == '1')
{
$za_api_url .= '&enableadblock=' . trim($enable_adblock);
}
if(trim($clickelement) != '')
{
$za_api_url .= '&clickelement=' . trim($clickelement);
}
$args = array(
'timeout' => $api_timeout,
'redirection' => 10,
'blocking' => true,
'compress' => false,
'decompress' => true,
'sslverify' => false,
'stream' => false
);
$ret_data = wp_remote_get($za_api_url, $args);
$response_code = wp_remote_retrieve_response_code( $ret_data );
$response_message = wp_remote_retrieve_response_message( $ret_data );
if ( 200 != $response_code ) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Failed to get response from HeadlessBrowserAPI: ' . $za_api_url . ' code: ' . $response_code . ' message: ' . $response_message);
if(isset($ret_data->errors['http_request_failed']))
{
foreach($ret_data->errors['http_request_failed'] as $errx)
{
crawlomatic_log_to_file('Error message: ' . html_entity_decode($errx));
}
}
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
} else {
$cmdResult = wp_remote_retrieve_body( $ret_data );
}
$jcmdResult = json_decode($cmdResult, true);
if($jcmdResult === false)
{
crawlomatic_log_to_file('Failed to decode response from HeadlessBrowserAPI (tor): ' . $za_api_url . ' - ' . print_r($cmdResult, true));
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
$cmdResult = $jcmdResult;
if(isset($cmdResult['apicalls']))
{
update_option('headless_calls', esc_html($cmdResult['apicalls']));
}
if(isset($cmdResult['error']))
{
crawlomatic_log_to_file('An error occurred while getting content from HeadlessBrowserAPI: ' . $za_api_url . ' - ' . print_r($cmdResult['error'], true));
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(!isset($cmdResult['html']))
{
crawlomatic_log_to_file('Malformed data imported from HeadlessBrowserAPI: ' . $za_api_url . ' - ' . print_r($cmdResult, true));
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return '<html><body>' . $cmdResult['html'] . '</body></html>';
}
function crawlomatic_get_page_PhantomJSAPI($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, $timeout = '', $request_delay = '', $scripter = '', $local_storage = '')
{
if($custom_user_agent == 'none')
{
$custom_user_agent = '';
}
elseif($custom_user_agent == '')
{
$custom_user_agent = crawlomatic_get_random_user_agent();
}
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (!isset($crawlomatic_Main_Settings['headlessbrowserapi_key']) || trim($crawlomatic_Main_Settings['headlessbrowserapi_key']) == '')
{
crawlomatic_log_to_file('You need to add your HeadlessBrowserAPI key in the plugin\'s \'Main Settings\' before you can use this feature.');
return false;
}
$delay = '';
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '') {
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(7), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = 'default';
}
$phantomjs_proxcomm = '"null"';
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '')
{
$proxy_url = $crawlomatic_Main_Settings['proxy_url'];
if(isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$proxy_auth = $crawlomatic_Main_Settings['proxy_auth'];
}
else
{
$proxy_auth = 'default';
}
}
else
{
$proxy_url = 'default';
$proxy_auth = 'default';
}
$za_api_url = 'https://headlessbrowserapi.com/apis/scrape/v1/phantomjs?apikey=' . trim($crawlomatic_Main_Settings['headlessbrowserapi_key']) . '&url=' . urlencode($url) . '&custom_user_agent=' . urlencode($custom_user_agent) . '&custom_cookies=' . urlencode($custom_cookies) . '&user_pass=' . urlencode($user_pass) . '&timeout=' . urlencode($phantomjs_timeout) . '&proxy_url=' . urlencode($proxy_url) . '&proxy_auth=' . urlencode($proxy_auth);
if($timeout != '')
{
$za_api_url .= '&sleep=' . urlencode($timeout);
}
if(trim($scripter) != '')
{
$za_api_url .= '&jsexec=' . urlencode(trim($scripter));
}
if(trim($local_storage) != '')
{
$za_api_url .= '&localstorage=' . urlencode(trim($local_storage));
}
$api_timeout = 120;
$args = array(
'timeout' => $api_timeout,
'redirection' => 10,
'blocking' => true,
'compress' => false,
'decompress' => true,
'sslverify' => false,
'stream' => false
);
$ret_data = wp_remote_get($za_api_url, $args);
$response_code = wp_remote_retrieve_response_code( $ret_data );
$response_message = wp_remote_retrieve_response_message( $ret_data );
if ( 200 != $response_code ) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Failed to get response from HeadlessBrowserAPI: ' . $za_api_url . ' code: ' . $response_code . ' message: ' . $response_message);
if(isset($ret_data->errors['http_request_failed']))
{
foreach($ret_data->errors['http_request_failed'] as $errx)
{
crawlomatic_log_to_file('Error message: ' . html_entity_decode($errx));
}
}
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
} else {
$cmdResult = wp_remote_retrieve_body( $ret_data );
}
$jcmdResult = json_decode($cmdResult, true);
if($jcmdResult === false)
{
crawlomatic_log_to_file('Failed to decode response from HeadlessBrowserAPI (phantomjs): ' . $za_api_url . ' - ' . print_r($cmdResult, true));
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
$cmdResult = $jcmdResult;
if(isset($cmdResult['apicalls']))
{
update_option('headless_calls', esc_html($cmdResult['apicalls']));
}
if(isset($cmdResult['error']))
{
crawlomatic_log_to_file('An error occurred while getting content from HeadlessBrowserAPI: ' . $za_api_url . ' - ' . print_r($cmdResult['error'], true));
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(!isset($cmdResult['html']))
{
crawlomatic_log_to_file('Malformed data imported from HeadlessBrowserAPI: ' . $za_api_url . ' - ' . print_r($cmdResult, true));
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return '<html><body>' . $cmdResult['html'] . '</body></html>';
}
function crawlomatic_get_page_Puppeteer($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, $timeout = '', $request_delay = '', $scripter = '', $local_storage = '')
{
if($custom_user_agent == 'none')
{
$custom_user_agent = '';
}
elseif($custom_user_agent == '')
{
$custom_user_agent = crawlomatic_get_random_user_agent();
}
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if(!function_exists('shell' . '_exec')) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('shel' . 'l_exec not found!');
}
return false;
}
$disabled = explode(',', ini_get('disable_functions'));
if(in_array('shell' . '_exec', $disabled))
{
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('shel' . 'l_exec disabled');
}
return false;
}
$delay = '';
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '') {
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(8), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
if($timeout == '')
{
$timeout = 'default';
}
if($scripter == '')
{
$scripter = 'default';
}
if($local_storage == '')
{
$local_storage = 'default';
}
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = '30000';
}
$phantomjs_proxcomm = '"null"';
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$phantomjs_proxcomm = '"' . trim($prx[$randomness]);
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$phantomjs_proxcomm .= '~~~' . trim($prx_auth[$randomness]);
}
}
$phantomjs_proxcomm .= '"';
}
$puppeteer_comm = 'node ';
$puppeteer_comm .= '"' . dirname(__FILE__) . '/res/puppeteer/puppeteer.js" "' . $url . '" ' . $phantomjs_proxcomm . ' "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . $phantomjs_timeout . '" "' . $timeout . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '"';
$puppeteer_comm .= ' 2>&1';
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Puppeteer command: ' . $puppeteer_comm);
}
$shefunc = trim(' s ') . trim(' h ') . 'ell' . '_exec';
$cmdResult = $shefunc($puppeteer_comm);
if($cmdResult === NULL || $cmdResult == '')
{
crawlomatic_log_to_file('puppeteer did not return usable info for: ' . $url);
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(trim($cmdResult) === 'timeout')
{
crawlomatic_log_to_file('puppeteer timed out while getting page: ' . $url. ' - please increase timeout in Main Settings');
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'Error: Cannot find module \'puppeteer\'') !== false)
{
crawlomatic_log_to_file('puppeteer not found on server: ' . $cmdResult);
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'sh: node: command not found') !== false || stristr($cmdResult, 'throw err;') !== false)
{
crawlomatic_log_to_file('nodeJS not found, please install it on your server');
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'sh: puppeteer: command not found') !== false)
{
crawlomatic_log_to_file('puppeteer not found, please install it on your server');
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'process.on(\'unhandledRejection\', up => { throw up })') !== false)
{
crawlomatic_log_to_file('puppeteer failed to download resource: ' . $url . ' - error: ' . $cmdResult);
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'Unhandled Rejection, reason: { TimeoutError') !== false)
{
crawlomatic_log_to_file('puppeteer failed to download resource: ' . $url . ' - timeout error: ' . $cmdResult);
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'res/puppeteer/puppeteer.js:') !== false)
{
crawlomatic_log_to_file('puppeteer failed to run, error: ' . $cmdResult);
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
//crawlomatic_log_to_file('Downloaded site (Puppeteer): ' . $url . ' -- ' . esc_html($cmdResult));
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return $cmdResult;
}
function crawlomatic_get_page_PhantomJS($url, $custom_cookies, $custom_user_agent, $use_proxy, $user_pass, $phantom_wait, $request_delay, $scripter, $local_storage)
{
if(!function_exists('shell' . '_exec')) {
crawlomatic_log_to_file('shell_' . 'exec not found, cannot run');
return false;
}
$disabled = explode(',', ini_get('disable_functions'));
if(in_array('shell' . '_exec', $disabled))
{
crawlomatic_log_to_file('shell' . '_exec disabled, cannot run');
return false;
}
if($custom_user_agent == 'none')
{
$custom_user_agent = '';
}
elseif($custom_user_agent == '')
{
$custom_user_agent = crawlomatic_get_random_user_agent();
}
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$delay = '';
if (isset($crawlomatic_Main_Settings['request_delay']) && $crawlomatic_Main_Settings['request_delay'] != '') {
if(stristr($crawlomatic_Main_Settings['request_delay'], ',') !== false)
{
$tempo = explode(',', $crawlomatic_Main_Settings['request_delay']);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($crawlomatic_Main_Settings['request_delay'])))
{
$delay = intval(trim($crawlomatic_Main_Settings['request_delay']));
}
}
}
if ($request_delay != '')
{
if(stristr($request_delay, ',') !== false)
{
$tempo = explode(',', $request_delay);
if(isset($tempo[1]) && is_numeric(trim($tempo[1])) && is_numeric(trim($tempo[0])))
{
$delay = rand(trim($tempo[0]), trim($tempo[1]));
}
}
else
{
if(is_numeric(trim($request_delay)))
{
$delay = intval(trim($request_delay));
}
}
}
if($delay != '' && is_numeric($delay))
{
$GLOBALS['wp_object_cache']->delete('crawlomatic_last_time', 'options');
$last_time = get_option('crawlomatic_last_time', false);
if($last_time !== false && intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000) > 0)
{
$sleep_time = intval(((intval($last_time) - time()) * 1000 + $delay ) * 1000);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging']))
{
crawlomatic_log_to_file('Delay between requests set(9), waiting ' . ($sleep_time/1000) . ' ms');
}
if($sleep_time < 21600000)
{
usleep($sleep_time);
}
}
}
if (isset($crawlomatic_Main_Settings['phantom_path']) && $crawlomatic_Main_Settings['phantom_path'] != '')
{
$phantomjs_comm = $crawlomatic_Main_Settings['phantom_path'];
}
else
{
$phantomjs_comm = 'phantomjs';
}
if (isset($crawlomatic_Main_Settings['phantom_timeout']) && $crawlomatic_Main_Settings['phantom_timeout'] != '')
{
$phantomjs_timeout = ((int)$crawlomatic_Main_Settings['phantom_timeout']);
}
else
{
$phantomjs_timeout = '30000';
}
if($custom_user_agent == '')
{
$custom_user_agent = 'default';
}
if($custom_cookies == '')
{
$custom_cookies = 'default';
}
if($user_pass == '')
{
$user_pass = 'default';
}
if($scripter == '')
{
$scripter = 'default';
}
if($local_storage == '')
{
$local_storage = 'default';
}
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$phantomjs_comm .= ' --proxy=' . trim($prx[$randomness]);
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$phantomjs_comm .= ' --proxy-auth=' . trim($prx_auth[$randomness]);
}
}
}
$phantomjs_comm .= ' --ignore-ssl-errors=true ';
$phantomjs_comm .= '"' . dirname(__FILE__) . '/res/phantomjs/phantom.js" "' . $url . '" "' . esc_html($phantomjs_timeout) . '" "' . $custom_user_agent . '" "' . $custom_cookies . '" "' . $user_pass . '" "' . esc_html($phantom_wait) . '" "' . addslashes($scripter) . '" "' . addslashes($local_storage) . '"';
$phantomjs_comm .= ' 2>&1';
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('PhantomJS command: ' . $phantomjs_comm);
}
$shefunc = trim(' s ') . trim(' h ') . 'ell' . '_exec';
$cmdResult = $shefunc($phantomjs_comm);
if($cmdResult === NULL || $cmdResult == '')
{
crawlomatic_log_to_file('phantomjs did not return usable info for: ' . $url);
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(trim($cmdResult) === 'timeout')
{
crawlomatic_log_to_file('phantomjs timed out while getting page: ' . $url. ' - please increase timeout in Main Settings');
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if(stristr($cmdResult, 'sh: phantomjs: command not found') !== false)
{
crawlomatic_log_to_file('phantomjs not found, please install it on your server');
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return false;
}
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
//crawlomatic_log_to_file('Downloaded site (PhantomJS): ' . $url . ' -- ' . esc_html($cmdResult));
}
if($delay != '' && is_numeric($delay))
{
update_option('crawlomatic_last_time', time());
}
return $cmdResult;
}
add_action('wp_loaded', 'crawlomatic_run_cron', 0);
function crawlomatic_run_cron()
{
if(isset($_GET['run_crawlomatic']))
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if(isset($crawlomatic_Main_Settings['secret_word']) && $_GET['run_crawlomatic'] == urlencode($crawlomatic_Main_Settings['secret_word']))
{
crawlomatic_cron();
die();
}
}
}
register_activation_hook(__FILE__, 'crawlomatic_activation_callback');
function crawlomatic_activation_callback($defaults = FALSE)
{
if (!get_option('crawlomatic_posts_per_page') || $defaults === TRUE) {
if ($defaults === FALSE) {
add_option('crawlomatic_posts_per_page', '12');
} else {
update_option('crawlomatic_posts_per_page', '12');
}
}
if (!get_option('crawlomatic_Main_Settings') || $defaults === TRUE) {
$crawlomatic_Main_Settings = array(
'crawlomatic_enabled' => 'on',
'disable_excerpt' => '',
'no_content_autodetect' => '',
'max_auto_links' => '5',
'def_user' => '1',
'fix_html' => '',
'alt_read' => '',
'convert_cyrilic' => '',
'add_canonical' => '',
'strip_scripts' => '',
'strip_html' => '',
'screenshot_height' => '450',
'screenshot_width' => '600',
'enable_metabox' => 'on',
'secret_word' => '',
'skip_no_img' => '',
'require_only_one' => '',
'global_req_words' => '',
'global_ban_words' => '',
'skip_old' => '',
'skip_year' => '',
'phantom_path' => '',
'phantom_timeout' => '',
'phantom_screen' => '',
'puppeteer_screen' => '',
'disable_fallback' => '',
'headless_screen' => '',
'skip_month' => '',
'skip_day' => '',
'custom_html2' => '',
'custom_html' => '',
'sentence_list' => 'This is one %adjective %noun %sentence_ending
This is another %adjective %noun %sentence_ending
I %love_it %nouns , because they are %adjective %sentence_ending
My %family says this plugin is %adjective %sentence_ending
These %nouns are %adjective %sentence_ending',
'sentence_list2' => 'Meet this %adjective %noun %sentence_ending
This is the %adjective %noun ever %sentence_ending
I %love_it %nouns , because they are the %adjective %sentence_ending
My %family says this plugin is very %adjective %sentence_ending
These %nouns are quite %adjective %sentence_ending',
'variable_list' => 'adjective_very => %adjective;very %adjective;
adjective => clever;interesting;smart;huge;astonishing;unbelievable;nice;adorable;beautiful;elegant;fancy;glamorous;magnificent;helpful;awesome
noun_with_adjective => %noun;%adjective %noun
noun => plugin;WordPress plugin;item;ingredient;component;constituent;module;add-on;plug-in;addon;extension
nouns => plugins;WordPress plugins;items;ingredients;components;constituents;modules;add-ons;plug-ins;addons;extensions
love_it => love;adore;like;be mad for;be wild about;be nuts about;be crazy about
family => %adjective %family_members;%family_members
family_members => grandpa;brother;sister;mom;dad;grandma
sentence_ending => .;!;!!',
'auto_clear_logs' => 'No',
'enable_logging' => 'on',
'enable_detailed_logging' => '',
'rule_timeout' => '3600',
'max_at_once' => '',
'author_id' => '',
'author_name' => '',
'category_name' => '',
'tag_name' => '',
'post_id' => '',
'post_name' => '',
'page_id' => '',
'pagename' => '',
'post_parent' => '',
'type_post' => '',
'search_query' => '',
'year' => '',
'month' => '',
'day' => '',
'featured_image' => '',
'request_timeout' => '60',
'request_delay' => '',
'strip_links' => '',
'strip_content_links' => '',
'strip_internal_content_links' => '',
'email_address' => '',
'email_summary' => '',
'send_email' => '',
'crawlomatic_timestamp' => '',
'crawlomatic_post_img' => '',
'crawlomatic_extra_tags' => '',
'crawlomatic_extra_categories' => '',
'crawlomatic_item_title' => '',
'crawlomatic_comment_status' => '',
'crawlomatic_enable_pingbacks' => '',
'crawlomatic_post_date' => '',
'send_post_email' => '',
'best_password' => '',
'only_imported' => '',
'protected_terms' => '',
'best_user' => '',
'spin_text' => 'disabled',
'wordai_uniqueness' => '',
'enable_robots' => '',
'max_word_content' => '',
'min_word_content' => '',
'max_word_title' => '',
'min_word_title' => '',
'crawlomatic_featured_image_checking' => '',
'random_image_names' => '',
'keep_srcset' => '',
'remove_img_content' => 'on',
'crawlomatic_clear_curl_charset' => '',
'proxy_url' => '',
'proxy_auth' => '',
'search_google' => '',
'post_source_custom' => '',
'default_dl_ext' => '',
'resize_width' => '',
'resize_height' => '',
'read_more_text' => 'Read More',
'max_download' => '',
'price_multiply' => '',
'price_add' => '',
'price_end' => '',
'd_sep' => '',
't_sep' => '',
'no_local_image' => '',
'url_image' => '',
'auto_delete_enabled' => '',
'run_after' => '',
'run_before' => '',
'disable_backend_content' => '',
'no_valid_link' => '',
'keep_filters' => '',
'unchanged_urls' => '',
'no_title_spin' => '',
'confidence_level' => 'high',
'tldr_max' => '',
'tldr_min' => '',
'copy_images' => '',
'no_local_attach' => '',
'no_local_dup' => '',
'rule_delay' => '',
'no_spin' => '',
'replace_url' => '',
'link_attributes_external' => '',
'link_attributes_internal' => '',
'multi_separator' => ',',
'do_not_check_duplicates' => '',
'cleanup_not_printable' => '',
'publish_delay' => '',
'title_duplicates' => '',
'no_dup_titles' => '',
'draft_first' => '',
'do_not_crawl_duplicates' => 'on',
'randomize_order' => '',
'link_source' => '',
'shortest_api' => '',
'update_existing' => '',
'no_up_img' => '',
'up_publish_date' => '',
'iframe_resize_height' => '',
'iframe_resize_width' => '',
'skip_image_names' => '',
'cat_separator' => ',',
'no_check' => '',
'deepl_auth' => '',
'deppl_free' => '',
'bing_auth' => '',
'textrazor_key' => '',
'bing_region' => '',
'google_trans_auth' => '',
'google_search_api' => '',
'auto_update_posts' => 'No',
'update_actions' => '',
'google_search_cx' => '',
'headlessbrowserapi_key' => '',
'flickr_order' => 'date-posted-desc',
'flickr_license' => '-1',
'flickr_api' => '',
'scrapeimg_height' => '',
'attr_text' => 'Photo Credit: <a href="%%image_source_url%%" target="_blank">%%image_source_name%%</a>',
'scrapeimg_width' => '',
'scrapeimg_cat' => 'all',
'scrapeimg_order' => 'any',
'scrapeimg_orientation' => 'all',
'imgtype' => 'all',
'pixabay_api' => '',
'pexels_api' => '',
'morguefile_secret' => '',
'morguefile_api' => '',
'bimage' => 'on',
'no_orig' => '',
'img_order' => 'popular',
'img_cat' => 'all',
'img_width' => '',
'img_mwidth' => '',
'img_ss' => '',
'img_editor' => '',
'img_language' => 'any',
'pixabay_scrape' => '',
'unsplash_api' => '',
'google_images' => '',
'scrapeimgtype' => 'all'
);
if ($defaults === FALSE) {
add_option('crawlomatic_Main_Settings', $crawlomatic_Main_Settings);
} else {
update_option('crawlomatic_Main_Settings', $crawlomatic_Main_Settings);
}
}
}
function crawlomatic_get_free_image($crawlomatic_Main_Settings, $query_words, &$img_attr, $res_cnt = 3)
{
$original_url = '';
$rand_arr = array();
if(isset($crawlomatic_Main_Settings['pixabay_api']) && $crawlomatic_Main_Settings['pixabay_api'] != '')
{
$rand_arr[] = 'pixabay';
}
if(isset($crawlomatic_Main_Settings['flickr_api']) && $crawlomatic_Main_Settings['flickr_api'] !== '')
{
$rand_arr[] = 'flickr';
}
if(isset($crawlomatic_Main_Settings['pexels_api']) && $crawlomatic_Main_Settings['pexels_api'] !== '')
{
$rand_arr[] = 'pexels';
}
if(isset($crawlomatic_Main_Settings['pixabay_scrape']) && $crawlomatic_Main_Settings['pixabay_scrape'] == 'on')
{
$rand_arr[] = 'pixabayscrape';
}
if(isset($crawlomatic_Main_Settings['unsplash_api']) && $crawlomatic_Main_Settings['unsplash_api'] == 'on')
{
$rand_arr[] = 'unsplash';
}
if(isset($crawlomatic_Main_Settings['google_images']) && $crawlomatic_Main_Settings['google_images'] == 'on')
{
$rand_arr[] = 'google';
}
$rez = false;
while(($rez === false || $rez === '') && count($rand_arr) > 0)
{
$rand = array_rand($rand_arr);
if($rand_arr[$rand] == 'pixabay')
{
unset($rand_arr[$rand]);
if(isset($crawlomatic_Main_Settings['img_ss']) && $crawlomatic_Main_Settings['img_ss'] == 'on')
{
$img_ss = '1';
}
else
{
$img_ss = '0';
}
if(isset($crawlomatic_Main_Settings['img_editor']) && $crawlomatic_Main_Settings['img_editor'] == 'on')
{
$img_editor = '1';
}
else
{
$img_editor = '0';
}
$rez = crawlomatic_get_pixabay_image($crawlomatic_Main_Settings['pixabay_api'], $query_words, $crawlomatic_Main_Settings['img_language'], $crawlomatic_Main_Settings['imgtype'], $crawlomatic_Main_Settings['scrapeimg_orientation'], $crawlomatic_Main_Settings['img_order'], $crawlomatic_Main_Settings['img_cat'], $crawlomatic_Main_Settings['img_mwidth'], $crawlomatic_Main_Settings['img_width'], $img_ss, $img_editor, $original_url, $res_cnt);
if($rez !== false && $rez !== '')
{
$img_attr = str_replace('%%image_source_name%%', 'Pixabay', $img_attr);
$img_attr = str_replace('%%image_source_url%%', $original_url, $img_attr);
$img_attr = str_replace('%%image_source_website%%', 'https://pixabay.com/', $img_attr);
}
}
elseif($rand_arr[$rand] == 'morguefile')
{
unset($rand_arr[$rand]);
$rez = crawlomatic_get_morguefile_image($crawlomatic_Main_Settings['morguefile_api'], $crawlomatic_Main_Settings['morguefile_secret'], $query_words, $original_url);
if($rez !== false && $rez !== '')
{
$img_attr = str_replace('%%image_source_name%%', 'MorgueFile', $img_attr);
$img_attr = str_replace('%%image_source_url%%', 'https://morguefile.com/', $img_attr);
$img_attr = str_replace('%%image_source_website%%', 'https://morguefile.com/', $img_attr);
}
}
elseif($rand_arr[$rand] == 'flickr')
{
unset($rand_arr[$rand]);
$rez = crawlomatic_get_flickr_image($crawlomatic_Main_Settings, $query_words, $original_url, $res_cnt);
if($rez !== false && $rez !== '')
{
$img_attr = str_replace('%%image_source_name%%', 'Flickr', $img_attr);
$img_attr = str_replace('%%image_source_url%%', $original_url, $img_attr);
$img_attr = str_replace('%%image_source_website%%', 'https://www.flickr.com/', $img_attr);
}
}
elseif($rand_arr[$rand] == 'pexels')
{
unset($rand_arr[$rand]);
$rez = crawlomatic_get_pexels_image($crawlomatic_Main_Settings, $query_words, $original_url, $res_cnt);
if($rez !== false && $rez !== '')
{
$img_attr = str_replace('%%image_source_name%%', 'Pexels', $img_attr);
$img_attr = str_replace('%%image_source_url%%', $original_url, $img_attr);
$img_attr = str_replace('%%image_source_website%%', 'https://www.pexels.com/', $img_attr);
}
}
elseif($rand_arr[$rand] == 'pixabayscrape')
{
unset($rand_arr[$rand]);
$rez = crawlomatic_scrape_pixabay_image($crawlomatic_Main_Settings, $query_words, $original_url);
if($rez !== false && $rez !== '')
{
$img_attr = str_replace('%%image_source_name%%', 'Pixabay', $img_attr);
$img_attr = str_replace('%%image_source_url%%', $original_url, $img_attr);
$img_attr = str_replace('%%image_source_website%%', 'https://pixabay.com/', $img_attr);
}
}
elseif($rand_arr[$rand] == 'unsplash')
{
unset($rand_arr[$rand]);
$rez = crawlomatic_scrape_unsplash_image($query_words, $original_url);
if($rez !== false && $rez !== '')
{
$img_attr = str_replace('%%image_source_name%%', 'Unsplash', $img_attr);
$img_attr = str_replace('%%image_source_url%%', $original_url, $img_attr);
$img_attr = str_replace('%%image_source_website%%', 'https://unsplash.com/', $img_attr);
}
}
elseif($rand_arr[$rand] == 'google')
{
unset($rand_arr[$rand]);
$original_url = 'https://google.com/';
$rez = crawlomatic_get_random_image_google($query_words);
if($rez !== false && $rez !== '')
{
$img_attr = str_replace('%%image_source_name%%', 'Google Images', $img_attr);
$img_attr = str_replace('%%image_source_url%%', $original_url, $img_attr);
$img_attr = str_replace('%%image_source_website%%', 'https://google.com/', $img_attr);
}
}
else
{
crawlomatic_log_to_file('Unrecognized free file source: ' . $rand_arr[$rand]);
unset($rand_arr[$rand]);
}
}
$img_attr = str_replace('%%image_source_name%%', '', $img_attr);
$img_attr = str_replace('%%image_source_url%%', '', $img_attr);
$img_attr = str_replace('%%image_source_website%%', '', $img_attr);
return $rez;
}
function crawlomatic_get_all_redirects($url){
$redirects = array();
while ($newurl = crawlomatic_get_redirect_url($url)){
if (in_array($newurl, $redirects)){
break;
}
$redirects[] = $newurl;
$url = $newurl;
}
return $redirects;
}
function crawlomatic_get_final_url($url){
if (strpos($url, 'localhost') !== false)
{
return $url;
}
$redirects = crawlomatic_get_all_redirects($url);
if (count($redirects)>0){
return array_pop($redirects);
} else {
return $url;
}
}
function crawlomatic_scrape_unsplash_image($query, &$original_url)
{
$original_url = 'https://unsplash.com/';
$feed_uri = 'https://source.unsplash.com/1600x900/';
if($query != '')
{
$feed_uri .= '?' . urlencode($query);
}
error_reporting(0);
ini_set('default_socket_timeout', 120);
$exec = get_headers($feed_uri);
error_reporting(E_ALL);
if ($exec === FALSE || !is_array($exec))
{
crawlomatic_log_to_file('Error while getting api url: ' . $feed_uri);
}
$nono = false;
$locx = false;
foreach($exec as $ex)
{
if(strstr($ex, 'Location:') !== false)
{
if(strstr($ex, 'source-404') !== false)
{
$nono = true;
}
$locx = $ex;
$locx1 = preg_replace('/^Location: /', '', $locx);
if($locx1 !== null)
{
$locx = $locx1;
}
break;
}
}
if($nono == true)
{
crawlomatic_log_to_file('NO image found on Unsplash for query: ' . $query);
return false;
}
else
{
if($locx == false)
{
crawlomatic_log_to_file('Failed to parse response: ' . $feed_uri);
return false;
}
$original_url = $locx;
return $locx;
}
}
function crawlomatic_generateRandomString($length = 10) {
$characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
$charactersLength = strlen($characters);
$randomString = '';
for ($i = 0; $i < $length; $i++) {
$randomString .= $characters[rand(0, $charactersLength - 1)];
}
return $randomString;
}
function crawlomatic_get_redirect_url($url){
$url_parts = parse_url($url);
if (!$url_parts) return false;
if (!isset($url_parts['host'])) return false;
if (!isset($url_parts['path'])) $url_parts['path'] = '/';
$sock = fsockopen($url_parts['host'], (isset($url_parts['port']) ? (int)$url_parts['port'] : 80), $errno, $errstr, 30);
if (!$sock) return false;
$request = "HEAD " . $url_parts['path'] . (isset($url_parts['query']) ? '?'.$url_parts['query'] : '') . " HTTP/1.1".PHP_EOL;
$request .= 'Host: ' . $url_parts['host'] . PHP_EOL;
$request .= "Connection: Close".PHP_EOL.PHP_EOL;
fwrite($sock, $request);
$response = '';
while(!feof($sock)) $response .= fread($sock, 8192);
fclose($sock);
if (preg_match('/^Location: (.+?)$/m', $response, $matches)){
if ( substr($matches[1], 0, 1) == "/" )
return $url_parts['scheme'] . "://" . $url_parts['host'] . trim($matches[1]);
else
return trim($matches[1]);
} else {
return false;
}
}
function crawlomatic_get_pixabay_image($app_id, $query, $lang, $image_type, $orientation, $order, $image_category, $max_width, $min_width, $safe_search, $editors_choice, &$original_url, $get_max = 3)
{
$original_url = 'https://pixabay.com';
$featured_image = '';
$feed_uri = 'https://pixabay.com/api/?key=' . $app_id;
if($query != '')
{
$feed_uri .= '&q=' . urlencode($query);
}
$feed_uri .= '&per_page=' . $get_max;
if($lang != '' && $lang != 'any')
{
$feed_uri .= '&lang=' . $lang;
}
if($image_type != '')
{
$feed_uri .= '&image_type=' . $image_type;
}
if($orientation != '')
{
$feed_uri .= '&orientation=' . $orientation;
}
if($order != '')
{
$feed_uri .= '&order=' . $order;
}
if($image_category != '')
{
$feed_uri .= '&category=' . $image_category;
}
if($max_width != '')
{
$feed_uri .= '&max_width=' . $max_width;
}
if($min_width != '')
{
$feed_uri .= '&min_width=' . $min_width;
}
if($safe_search == '1')
{
$feed_uri .= '&safesearch=true';
}
if($editors_choice == '1')
{
$feed_uri .= '&editors_choice=true';
}
$exec = crawlomatic_get_web_page($feed_uri, '', '', '0', '', '', '', '');
if ($exec !== FALSE)
{
if (stristr($exec, '"hits"') !== FALSE)
{
$exec1 = preg_replace('#^[a-zA-Z0-9]*#', '', $exec);
if($exec1 !== null)
{
$exec = $exec1;
}
$exec = trim($exec, '()');
$json = json_decode($exec);
$items = $json->hits;
if (count($items) != 0)
{
shuffle($items);
foreach($items as $item)
{
$featured_image = $item->webformatURL;
$original_url = $item->pageURL;
break;
}
}
}
else
{
crawlomatic_log_to_file('Unknow response from api: ' . $feed_uri . ' - resp: ' . $exec);
return false;
}
}
else
{
crawlomatic_log_to_file('Error while getting api url: ' . $feed_uri);
return false;
}
return $featured_image;
}
function crawlomatic_summarize_content($title, $content)
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (!isset($crawlomatic_Main_Settings['best_password']) || trim($crawlomatic_Main_Settings['best_password']) == '') {
crawlomatic_log_to_file('Please insert a valid "TLDR" API key.');
return FALSE;
}
$curl = curl_init();
if($curl === false)
{
return false;
}
$content = strip_tags($content, '<br>');
$content = preg_replace('#<br\s*/?>#i', "
", $content);
$fdata = "";
$xdata = array();
if (!isset($crawlomatic_Main_Settings['tldr_max']) || trim($crawlomatic_Main_Settings['tldr_max']) == '')
{
$tmax = 300;
}
else
{
$tmax = intval($crawlomatic_Main_Settings['tldr_max']);
}
if (!isset($crawlomatic_Main_Settings['tldr_min']) || trim($crawlomatic_Main_Settings['tldr_min']) == '')
{
$tmin = 100;
}
else
{
$tmin = intval($crawlomatic_Main_Settings['tldr_min']);
}
$xdata['min_length'] = $tmin;
$xdata['max_length'] = $tmax;
$xdata['text'] = str_replace('"', '\'', $content);
$fdata = json_encode($xdata);
curl_setopt_array($curl, [
CURLOPT_URL => "https://tldrthis.p.rapidapi.com/v1/model/abstractive/summarize-text/",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "POST",
CURLOPT_POSTFIELDS => $fdata,
CURLOPT_HTTPHEADER => [
"content-type: application/json",
"x-rapidapi-host: tldrthis.p.rapidapi.com",
"x-rapidapi-key: " . trim($crawlomatic_Main_Settings['best_password'])
],
]);
$response = curl_exec($curl);
if($response === false)
{
return false;
}
$err = curl_error($curl);
curl_close($curl);
if ($err) {
crawlomatic_log_to_file( "cURL Error #:" . $err);
return false;
} else {
$zali = json_decode($response);
if($zali == false)
{
crawlomatic_log_to_file( "Failed to decode response: " . $response);
return false;
}
if(!isset($zali->summary))
{
crawlomatic_log_to_file( "Failed to understand response: " . $response);
return false;
}
$xsum = array($title, nl2br($zali->summary));
return $xsum;
}
}
function crawlomatic_scrape_pixabay_image($crawlomatic_Main_Settings, $query, &$original_url)
{
$original_url = 'https://pixabay.com';
$featured_image = '';
$feed_uri = 'https://pixabay.com/en/photos/';
if($query != '')
{
$feed_uri .= '?q=' . urlencode($query);
}
if($crawlomatic_Main_Settings['scrapeimgtype'] != 'all')
{
$feed_uri .= '&image_type=' . $crawlomatic_Main_Settings['scrapeimgtype'];
}
if($crawlomatic_Main_Settings['scrapeimg_orientation'] != '')
{
$feed_uri .= '&orientation=' . $crawlomatic_Main_Settings['scrapeimg_orientation'];
}
if($crawlomatic_Main_Settings['scrapeimg_order'] != '' && $crawlomatic_Main_Settings['scrapeimg_order'] != 'any')
{
$feed_uri .= '&order=' . $crawlomatic_Main_Settings['scrapeimg_order'];
}
if($crawlomatic_Main_Settings['scrapeimg_cat'] != '')
{
$feed_uri .= '&category=' . $crawlomatic_Main_Settings['scrapeimg_cat'];
}
if($crawlomatic_Main_Settings['scrapeimg_height'] != '')
{
$feed_uri .= '&min_height=' . $crawlomatic_Main_Settings['scrapeimg_height'];
}
if($crawlomatic_Main_Settings['scrapeimg_width'] != '')
{
$feed_uri .= '&min_width=' . $crawlomatic_Main_Settings['scrapeimg_width'];
}
$exec = crawlomatic_get_web_page($feed_uri, '', '', '0', '', '', '', '');
if ($exec !== FALSE)
{
preg_match_all('/<a href="([^"]+?)".+?(?:data-lazy|src)="([^"]+?\.jpg|png)"/i', $exec, $matches);
if (!empty($matches[2])) {
$p = array_combine($matches[1], $matches[2]);
if(count($p) > 0)
{
shuffle($p);
foreach ($p as $key => $val) {
$featured_image = $val;
if(!is_numeric($key))
{
if(substr($key, 0, 4) !== "http")
{
$key = 'https://pixabay.com' . $key;
}
$original_url = $key;
}
else
{
$original_url = 'https://pixabay.com';
}
break;
}
}
}
}
else
{
crawlomatic_log_to_file('Error while getting api url: ' . $feed_uri);
return false;
}
return $featured_image;
}
function crawlomatic_get_morguefile_image($app_id, $app_secret, $query, &$original_url)
{
$featured_image = '';
if(!class_exists('crawlomatic_morguefile'))
{
require_once (dirname(__FILE__) . "/res/morguefile/mf.api.class.php");
}
$query = explode(' ', $query);
$query = $query[0];
{
$mf = new crawlomatic_morguefile($app_id, $app_secret);
$rez = $mf->call('/images/search/sort/page/' . $query);
if ($rez !== FALSE)
{
$chosen_one = $rez->doc[array_rand($rez->doc)];
if (isset($chosen_one->file_path_large))
{
return $chosen_one->file_path_large;
}
else
{
return false;
}
}
else
{
crawlomatic_log_to_file('Error while getting api response from morguefile.');
return false;
}
}
return $featured_image;
}
function crawlomatic_get_flickr_image($crawlomatic_Main_Settings, $query, &$original_url, $max)
{
$original_url = 'https://www.flickr.com';
$featured_image = '';
$feed_uri = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key=' . $crawlomatic_Main_Settings['flickr_api'] . '&media=photos&per_page=' . esc_html($max) . '&format=php_serial&text=' . urlencode($query);
if(isset($crawlomatic_Main_Settings['flickr_license']) && $crawlomatic_Main_Settings['flickr_license'] != '-1')
{
$feed_uri .= '&license=' . $crawlomatic_Main_Settings['flickr_license'];
}
if(isset($crawlomatic_Main_Settings['flickr_order']) && $crawlomatic_Main_Settings['flickr_order'] != '')
{
$feed_uri .= '&sort=' . $crawlomatic_Main_Settings['flickr_order'];
}
$feed_uri .= '&extras=description,license,date_upload,date_taken,owner_name,icon_server,original_format,last_update,geo,tags,machine_tags,o_dims,views,media,path_alias,url_sq,url_t,url_s,url_q,url_m,url_n,url_z,url_c,url_l,url_o';
{
$ch = curl_init();
if ($ch === FALSE) {
crawlomatic_log_to_file('Failed to init curl for flickr!');
return false;
}
$timeout = 60;
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Referer: https://www.flickr.com/'));
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_HTTPGET, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_URL, $feed_uri);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$exec = curl_exec($ch);
curl_close($ch);
if (stristr($exec, 'photos') === FALSE) {
crawlomatic_log_to_file('Unrecognized Flickr API response: ' . $exec . ' URI: ' . $feed_uri);
return false;
}
$items = unserialize ( $exec );
if(!isset($items['photos']['photo']))
{
crawlomatic_log_to_file('Failed to find photo node in response: ' . $exec . ' URI: ' . $feed_uri);
return false;
}
if(count($items['photos']['photo']) == 0)
{
return $featured_image;
}
$x = 0;
shuffle($items['photos']['photo']);
while($featured_image == '' && isset($items['photos']['photo'][$x]))
{
$item = $items['photos']['photo'][$x];
if(isset($item['url_o']))
{
$featured_image = $item['url_o'];
}
elseif(isset($item['url_l']))
{
$featured_image = $item['url_l'];
}
elseif(isset($item['url_c']))
{
$featured_image = $item['url_c'];
}
elseif(isset($item['url_z']))
{
$featured_image = $item['url_z'];
}
elseif(isset($item['url_n']))
{
$featured_image = $item['url_n'];
}
elseif(isset($item['url_m']))
{
$featured_image = $item['url_m'];
}
elseif(isset($item['url_q']))
{
$featured_image = $item['url_q'];
}
elseif(isset($item['url_s']))
{
$featured_image = $item['url_s'];
}
elseif(isset($item['url_t']))
{
$featured_image = $item['url_t'];
}
elseif(isset($item['url_sq']))
{
$featured_image = $item['url_sq'];
}
if($featured_image != '')
{
$original_url = esc_url('https://www.flickr.com/photos/' . $item['owner'] . '/' . $item['id']);
}
$x++;
}
}
return $featured_image;
}
function crawlomatic_endsWith($haystack, $needle)
{
$length = strlen($needle);
if ($length == 0) {
return true;
}
return (substr($haystack, -$length) === $needle);
}
function crawlomatic_get_pexels_image($crawlomatic_Main_Settings, $query, &$original_url, $max)
{
$original_url = 'https://pexels.com';
$featured_image = '';
$feed_uri = 'https://api.pexels.com/v1/search?query=' . urlencode($query) . '&per_page=' . $max;
{
$ch = curl_init();
if ($ch === FALSE) {
crawlomatic_log_to_file('Failed to init curl for flickr!');
return false;
}
$timeout = 60;
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Authorization: ' . $crawlomatic_Main_Settings['pexels_api']));
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_HTTPGET, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_URL, $feed_uri);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$exec = curl_exec($ch);
curl_close($ch);
if (stristr($exec, 'photos') === FALSE) {
crawlomatic_log_to_file('Unrecognized Pexels API response: ' . $exec . ' URI: ' . $feed_uri);
return false;
}
$items = json_decode ( $exec, true );
if(!isset($items['photos']))
{
crawlomatic_log_to_file('Failed to find photo node in Pexels response: ' . $exec . ' URI: ' . $feed_uri);
return false;
}
if(count($items['photos']) == 0)
{
return $featured_image;
}
$x = 0;
shuffle($items['photos']);
while($featured_image == '' && isset($items['photos'][$x]))
{
$item = $items['photos'][$x];
if(isset($item['src']['large']))
{
$featured_image = $item['src']['large'];
}
elseif(isset($item['src']['medium']))
{
$featured_image = $item['src']['medium'];
}
elseif(isset($item['src']['small']))
{
$featured_image = $item['src']['small'];
}
elseif(isset($item['src']['portrait']))
{
$featured_image = $item['src']['portrait'];
}
elseif(isset($item['src']['landscape']))
{
$featured_image = $item['src']['landscape'];
}
elseif(isset($item['src']['original']))
{
$featured_image = $item['src']['original'];
}
elseif(isset($item['src']['tiny']))
{
$featured_image = $item['src']['tiny'];
}
if($featured_image != '')
{
$original_url = $item['url'];
}
$x++;
}
}
return $featured_image;
}
function crawlomatic_url_handle($href, $api_key)
{
$ch = curl_init();
if($ch === false)
{
return $href;
}
curl_setopt($ch, CURLOPT_URL, "https://api.shorte.st/v1/data/url");
curl_setopt($ch, CURLOPT_POSTFIELDS, "urlToShorten=" . trim($href));
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "PUT");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$headers = [
'public-api-token: ' . $api_key,
'Content-Type: application/x-www-form-urlencoded'
];
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT,10);
$serverOutput = json_decode(curl_exec($ch), true);
curl_close($ch);
if (!isset($serverOutput['shortenedUrl']) || $serverOutput['shortenedUrl'] == '') {
return $href;
} else {
return esc_url($serverOutput['shortenedUrl']);
}
}
function crawlomatic_spin_text($title, $content, $alt = false)
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$titleSeparator = '[19459000]';
$text = $title . ' ' . $titleSeparator . ' ' . $content;
$text = html_entity_decode($text);
preg_match_all("/<[^<>]+>/is", $text, $matches, PREG_PATTERN_ORDER);
$htmlfounds = array_filter(array_unique($matches[0]));
$htmlfounds[] = '"';
$imgFoundsSeparated = array();
foreach ($htmlfounds as $key => $currentFound) {
if (stristr($currentFound, '<img') && stristr($currentFound, 'alt')) {
$altSeparator = '';
$colonSeparator = '';
if (stristr($currentFound, 'alt="')) {
$altSeparator = 'alt="';
$colonSeparator = '"';
} elseif (stristr($currentFound, 'alt = "')) {
$altSeparator = 'alt = "';
$colonSeparator = '"';
} elseif (stristr($currentFound, 'alt ="')) {
$altSeparator = 'alt ="';
$colonSeparator = '"';
} elseif (stristr($currentFound, 'alt= "')) {
$altSeparator = 'alt= "';
$colonSeparator = '"';
} elseif (stristr($currentFound, 'alt=\'')) {
$altSeparator = 'alt=\'';
$colonSeparator = '\'';
} elseif (stristr($currentFound, 'alt = \'')) {
$altSeparator = 'alt = \'';
$colonSeparator = '\'';
} elseif (stristr($currentFound, 'alt= \'')) {
$altSeparator = 'alt= \'';
$colonSeparator = '\'';
} elseif (stristr($currentFound, 'alt =\'')) {
$altSeparator = 'alt =\'';
$colonSeparator = '\'';
}
if (trim($altSeparator) != '') {
$currentFoundParts = explode($altSeparator, $currentFound);
$preAlt = $currentFoundParts[1];
$preAltParts = explode($colonSeparator, $preAlt);
$altText = $preAltParts[0];
if (trim($altText) != '') {
unset($preAltParts[0]);
$imgFoundsSeparated[] = $currentFoundParts[0] . $altSeparator;
$imgFoundsSeparated[] = $colonSeparator . implode('', $preAltParts);
$htmlfounds[$key] = '';
}
}
}
}
if (count($imgFoundsSeparated) != 0) {
$htmlfounds = array_merge($htmlfounds, $imgFoundsSeparated);
}
preg_match_all("/<\!--.*?-->/is", $text, $matches2, PREG_PATTERN_ORDER);
$newhtmlfounds = $matches2[0];
preg_match_all("/\[.*?\]/is", $text, $matches3, PREG_PATTERN_ORDER);
$shortcodesfounds = $matches3[0];
$htmlfounds = array_merge($htmlfounds, $newhtmlfounds, $shortcodesfounds);
$in = 0;
$cleanHtmlFounds = array();
foreach ($htmlfounds as $htmlfound) {
if ($htmlfound == '[19459000]') {
} elseif (trim($htmlfound) == '') {
} else {
$cleanHtmlFounds[] = $htmlfound;
}
}
$htmlfounds = $cleanHtmlFounds;
$start = 19459001;
foreach ($htmlfounds as $htmlfound) {
$text = str_replace($htmlfound, '[' . $start . ']', $text);
$start++;
}
try {
require_once(dirname(__FILE__) . "/res/crawlomatic-text-spinner.php");
$phpTextSpinner = new PhpTextSpinner();
if ($alt === FALSE) {
$spinContent = $phpTextSpinner->spinContent($text);
} else {
$spinContent = $phpTextSpinner->spinContentAlt($text);
}
$translated = $phpTextSpinner->runTextSpinner($spinContent);
}
catch (Exception $e) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Exception thrown in spinText ' . $e);
}
return false;
}
preg_match_all('{\[.*?\]}', $translated, $brackets);
$brackets = $brackets[0];
$brackets = array_unique($brackets);
foreach ($brackets as $bracket) {
if (stristr($bracket, '19')) {
$corrrect_bracket = str_replace(' ', '', $bracket);
$corrrect_bracket = str_replace('.', '', $corrrect_bracket);
$corrrect_bracket = str_replace(',', '', $corrrect_bracket);
$translated = str_replace($bracket, $corrrect_bracket, $translated);
}
}
if (stristr($translated, $titleSeparator)) {
$start = 19459001;
foreach ($htmlfounds as $htmlfound) {
$translated = str_replace('[' . $start . ']', $htmlfound, $translated);
$start++;
}
$contents = explode($titleSeparator, $translated);
$title = $contents[0];
$content = $contents[1];
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to parse spinned content, separator not found');
}
return false;
}
return array(
$title,
$content
);
}
function crawlomatic_removeTagByClass(string $html, string $className)
{
if($html == '')
{
return '';
}
$dom = new \DOMDocument();
$internalErrors = libxml_use_internal_errors(true);
$dom->loadHTML('<?xml encoding="utf-8" ?>' . $html);
libxml_use_internal_errors($internalErrors);
$finder = new \DOMXPath($dom);
$nodes = $finder->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' {$className} ')]");
$modified = false;
foreach ($nodes as $node) {
if($node->parentNode !== null)
{
$modified = true;
$node->parentNode->removeChild($node);
}
}
if($modified == false)
{
return $html;
}
return $dom->saveHTML();
}
function crawlomatic_removeTagByID(string $html, string $className)
{
if($html == '')
{
return '';
}
$dom = new \DOMDocument();
$internalErrors = libxml_use_internal_errors(true);
$dom->loadHTML('<?xml encoding="utf-8" ?>' . $html);
libxml_use_internal_errors($internalErrors);
$finder = new \DOMXPath($dom);
$nodes = $finder->query('//*[@id="' . trim($className) . '"]');
$modified = false;
foreach ($nodes as $node) {
if($node->parentNode !== null)
{
$modified = true;
$node->parentNode->removeChild($node);
}
}
if($modified == false)
{
return $html;
}
return $dom->saveHTML();
}
function crawlomatic_removeTagByXPath(string $html, string $className)
{
if($html == '')
{
return '';
}
$dom = new \DOMDocument();
$internalErrors = libxml_use_internal_errors(true);
$dom->loadHTML('<?xml encoding="utf-8" ?>' . $html);
libxml_use_internal_errors($internalErrors);
$finder = new \DOMXPath($dom);
$nodes = $finder->query(trim($className));
$modified = false;
foreach ($nodes as $node) {
if($node->parentNode !== null)
{
$modified = true;
$node->parentNode->removeChild($node);
}
}
if($modified == false)
{
return $html;
}
return $dom->saveHTML();
}
function crawlomatic_removeHTMLByXPath(string $html, string $className)
{
if($html == '')
{
return '';
}
$dom = new \DOMDocument();
$internalErrors = libxml_use_internal_errors(true);
$dom->loadHTML('<?xml encoding="utf-8" ?>' . $html);
libxml_use_internal_errors($internalErrors);
$finder = new \DOMXPath($dom);
$nodes = $finder->query(trim($className));
$modified = false;
foreach ($nodes as $node) {
$modified = true;
$node->parentNode->replaceChild($dom->createTextNode($node->nodeValue), $node);
}
if($modified == false)
{
return $html;
}
return $dom->saveHTML();
}
function crawlomatic_removeTagByTag(string $html, string $className)
{
if($html == '')
{
return '';
}
$dom = new \DOMDocument();
$internalErrors = libxml_use_internal_errors(true);
$dom->loadHTML('<?xml encoding="utf-8" ?>' . $html);
libxml_use_internal_errors($internalErrors);
$finder = new \DOMXPath($dom);
$nodes = $finder->query("//" . trim($className));
$modified = false;
foreach ($nodes as $node) {
if($node->parentNode !== null)
{
$modified = true;
$node->parentNode->removeChild($node);
}
}
if($modified == false)
{
return $html;
}
return $dom->saveHTML();
}
function crawlomatic_best_spin_text($title, $content, $user_name = '', $pass = '')
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$data = array();
if($user_name != '' && $pass != '')
{
$data['username'] = $user_name;
$data['password'] = $pass;
}
else
{
if (!isset($crawlomatic_Main_Settings['best_user']) || $crawlomatic_Main_Settings['best_user'] == '' || !isset($crawlomatic_Main_Settings['best_password']) || $crawlomatic_Main_Settings['best_password'] == '') {
crawlomatic_log_to_file('Please insert a valid "The Best Spinner" user name and password.');
return FALSE;
}
$data['username'] = $crawlomatic_Main_Settings['best_user'];
$data['password'] = $crawlomatic_Main_Settings['best_password'];
}
$titleSeparator = '[19459000]';
$newhtml = $title . ' ' . $titleSeparator . ' ' . $content;
$url = 'http://thebestspinner.com/api.php';
$data['action'] = 'authenticate';
$data['format'] = 'php';
$ch = curl_init();
if ($ch === FALSE) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('"The Best Spinner" failed to init curl.');
}
return FALSE;
}
$timeout = 90;
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
$fdata = "";
foreach ($data as $key => $val) {
$fdata .= "$key=" . urlencode($val) . "&";
}
curl_setopt($ch, CURLOPT_POSTFIELDS, $fdata);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$html = crawlomatic_curl_exec_utf8($ch);
curl_close($ch);
if ($html === FALSE || empty($html)) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('"The Best Spinner" failed to exec curl.');
}
return FALSE;
}
$output = unserialize($html);
if ($output['success'] == 'true') {
$session = $output['session'];
$data = array();
$data['session'] = $session;
$data['format'] = 'php';
if (isset($crawlomatic_Main_Settings['protected_terms']) && $crawlomatic_Main_Settings['protected_terms'] != '')
{
$protected_terms = $crawlomatic_Main_Settings['protected_terms'];
}
else
{
$protected_terms = '';
}
$data['protectedterms'] = $protected_terms;
$data['action'] = 'replaceEveryonesFavorites';
$data['maxsyns'] = '100';
$data['quality'] = '1';
$ch = curl_init();
if ($ch === FALSE) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to init curl');
}
return FALSE;
}
$newhtml = html_entity_decode($newhtml);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $url);
$spinned = '';
if(str_word_count($newhtml) > 4000)
{
while($newhtml != '')
{
$first30k = substr($newhtml, 0, 30000);
$first30k = rtrim($first30k, '(*');
$first30k = ltrim($first30k, ')*');
$newhtml = substr($newhtml, 30000);
$data['text'] = $first30k;
$fdata = "";
foreach ($data as $key => $val) {
$fdata .= "$key=" . urlencode($val) . "&";
}
curl_setopt($ch, CURLOPT_POSTFIELDS, $fdata);
$output = curl_exec($ch);
if ($output === FALSE) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('"The Best Spinner" failed to exec curl after auth.');
}
return FALSE;
}
$output = unserialize($output);
if ($output['success'] == 'true') {
$spinned .= ' ' . $output['output'];
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('"The Best Spinner" failed to spin article.');
}
return FALSE;
}
}
}
else
{
$data['text'] = $newhtml;
$fdata = "";
foreach ($data as $key => $val) {
$fdata .= "$key=" . urlencode($val) . "&";
}
curl_setopt($ch, CURLOPT_POSTFIELDS, $fdata);
$output = curl_exec($ch);
if ($output === FALSE) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('"The Best Spinner" failed to exec curl after auth.');
}
return FALSE;
}
$output = unserialize($output);
if ($output['success'] == 'true') {
$spinned = $output['output'];
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('"The Best Spinner" failed to spin article: ' . print_r($output, true));
}
return FALSE;
}
}
curl_close($ch);
$result = explode($titleSeparator, $spinned);
if (count($result) < 2) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('"The Best Spinner" failed to spin article - titleseparator not found.' . print_r($output, true));
}
return FALSE;
}
$spintax = new Crawlomatic_Spintax();
$result[0] = $spintax->process($result[0]);
$result[1] = $spintax->process($result[1]);
return $result;
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('"The Best Spinner" authentification failed. ' . print_r($output, true));
}
return FALSE;
}
}
class Crawlomatic_Spintax
{
public function process($text)
{
return stripslashes(preg_replace_callback(
'/\{(((?>[^\{\}]+)|(?R))*)\}/x',
array($this, 'replace'),
preg_quote($text)
));
}
public function replace($text)
{
$text = $this->process($text[1]);
$parts = explode('|', $text);
return $parts[array_rand($parts)];
}
}
function crawlomatic_wordai_spin_text($title, $content, $user_name = '', $pass = '')
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if($user_name != '' && $pass != '')
{
$email = $user_name;
$pass = $pass;
}
else
{
if (!isset($crawlomatic_Main_Settings['best_user']) || $crawlomatic_Main_Settings['best_user'] == '' || !isset($crawlomatic_Main_Settings['best_password']) || $crawlomatic_Main_Settings['best_password'] == '') {
crawlomatic_log_to_file('Please insert a valid "Wordai" user name and password.');
return FALSE;
}
$email = $crawlomatic_Main_Settings['best_user'];
$pass = $crawlomatic_Main_Settings['best_password'];
}
$titleSeparator = '[19459000]';
$html = $title . ' ' . $titleSeparator . ' ' . $content;
$html = urlencode($html);
$ch = curl_init('https://wai.wordai.com/api/rewrite');
if($ch === false)
{
crawlomatic_log_to_file('Failed to init curl in wordai spinning.');
return FALSE;
}
$timeout = 120;
if (isset($crawlomatic_Main_Settings['wordai_uniqueness']) && $crawlomatic_Main_Settings['wordai_uniqueness'] != '')
{
$wordai_uniqueness = trim($crawlomatic_Main_Settings['wordai_uniqueness']);
}
else
{
$wordai_uniqueness = '2';
}
if($wordai_uniqueness != '1' && $wordai_uniqueness != '2' && $wordai_uniqueness != '3')
{
$wordai_uniqueness = '2';
}
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt ($ch, CURLOPT_POSTFIELDS, "input=$html&uniqueness=" . $wordai_uniqueness . "&rewrite_num=1&return_rewrites=true&email=$email&key=$pass");
curl_setopt ($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, 0);
$result = curl_exec($ch);
if ($result === FALSE) {
crawlomatic_log_to_file('"Wordai" failed to exec curl after auth. URL: https://wai.wordai.com/api/rewrite , POST: ' . "input=$html&uniqueness=" . $wordai_uniqueness . "&rewrite_num=1&return_rewrites=true&email=$email&key=$pass" . ' -- ERROR: ' . curl_error($ch));
curl_close ($ch);
return FALSE;
}
curl_close ($ch);
$result = json_decode($result);
if(!isset($result->rewrites))
{
crawlomatic_log_to_file('"Wordai" unrecognized response: ' . print_r($result, true));
return FALSE;
}
$result = explode($titleSeparator, $result->rewrites[0]);
if (count($result) < 2) {
$result[1] = $result[0];
$result[0] = $title;
}
return $result;
}
function crawlomatic_spinrewriter_spin_text($title, $content, $confidence = 'high')
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (!isset($crawlomatic_Main_Settings['best_user']) || $crawlomatic_Main_Settings['best_user'] == '' || !isset($crawlomatic_Main_Settings['best_password']) || $crawlomatic_Main_Settings['best_password'] == '') {
crawlomatic_log_to_file('Please insert a valid "SpinRewriter" user name and password.');
return FALSE;
}
$titleSeparator = '(19459000)';
$quality = '50';
$html = $title . ' ' . $titleSeparator . ' ' . $content;
$html1 = preg_replace('/\s+/', ' ', $html);
if($html1 !== null)
{
$html = $html1;
}
$html = str_replace('{', '[', $html);
$html = str_replace('}', ']', $html);
$data = array();
$data['email_address'] = $crawlomatic_Main_Settings['best_user'];
$data['api_key'] = $crawlomatic_Main_Settings['best_password'];
$data['action'] = "unique_variation";
$data['auto_protected_terms'] = "true";
$data['confidence_level'] = $confidence;
$data['auto_sentences'] = "true";
$data['auto_paragraphs'] = "false";
$data['auto_new_paragraphs'] = "false";
$data['auto_sentence_trees'] = "false";
$data['use_only_synonyms'] = "true";
$data['reorder_paragraphs'] = "false";
$data['nested_spintax'] = "false";
if (isset($crawlomatic_Main_Settings['protected_terms']) && $crawlomatic_Main_Settings['protected_terms'] != '')
{
$protected_terms = $crawlomatic_Main_Settings['protected_terms'];
$data['protected_terms'] = str_replace(',', '
', $protected_terms);
}
if(str_word_count($html) >= 2500)
{
$result = '';
while($html != '' && $html != ' ')
{
$words = explode(" ", $html);
$first30k = join(" ", array_slice($words, 0, 2500));
$html = join(" ", array_slice($words, 2500));
$data['text'] = $first30k;
$api_response_raw = crawlomatic_spinrewriter_api_post($data);
if ($api_response_raw === FALSE) {
crawlomatic_log_to_file('"SpinRewriter" failed to exec curl after auth.');
return FALSE;
}
$api_response = json_decode($api_response_raw);
if(!isset($api_response->response) || !isset($api_response->status) || $api_response->status != 'OK')
{
if(isset($api_response->status) && $api_response->status == 'ERROR')
{
if(isset($api_response->response) && $api_response->response == 'You can only submit entirely new text for analysis once every 7 seconds.')
{
$api_response_raw = crawlomatic_spinrewriter_api_post($data);
if ($api_response_raw === FALSE) {
crawlomatic_log_to_file('"SpinRewriter" failed to exec curl after auth (after resubmit).');
return FALSE;
}
$api_response = json_decode($api_response_raw);
if(!isset($api_response->response) || !isset($api_response->status) || $api_response->status != 'OK')
{
crawlomatic_log_to_file('"SpinRewriter" failed to wait and resubmit spinning: ' . print_r($api_response, true) . ' params: ' . print_r($data, true));
return FALSE;
}
}
else
{
crawlomatic_log_to_file('"SpinRewriter" error response1: ' . print_r($api_response_raw, true) . ' params: ' . print_r($data, true));
return FALSE;
}
}
else
{
crawlomatic_log_to_file('"SpinRewriter" error response2: ' . print_r($api_response_raw, true) . ' params: ' . print_r($data, true));
return FALSE;
}
}
$spinned = $api_response->response;
$result .= ' ' . $spinned;
if($html != '' && $html != ' ')
{
sleep(7);
}
}
}
else
{
$data['text'] = $html;
$api_response_raw = crawlomatic_spinrewriter_api_post($data);
if ($api_response_raw === FALSE) {
crawlomatic_log_to_file('"SpinRewriter" failed to exec curl after auth.');
return FALSE;
}
$api_response = json_decode($api_response_raw);
if(!isset($api_response->response) || !isset($api_response->status) || $api_response->status != 'OK')
{
if(isset($api_response->status) && $api_response->status == 'ERROR')
{
if(isset($api_response->response) && $api_response->response == 'You can only submit entirely new text for analysis once every 7 seconds.')
{
sleep(7);
$api_response_raw = crawlomatic_spinrewriter_api_post($data);
if ($api_response_raw === FALSE) {
crawlomatic_log_to_file('"SpinRewriter" failed to exec curl after auth (after resubmit).');
return FALSE;
}
$api_response = json_decode($api_response_raw);
if(!isset($api_response->response) || !isset($api_response->status) || $api_response->status != 'OK')
{
crawlomatic_log_to_file('"SpinRewriter" failed to wait and resubmit spinning: ' . print_r($api_response, true) . ' params: ' . print_r($data, true));
return FALSE;
}
}
else
{
crawlomatic_log_to_file('"SpinRewriter" error response3: ' . print_r($api_response_raw, true) . ' params: ' . print_r($data, true));
return FALSE;
}
}
else
{
sleep(7);
$api_response_raw = crawlomatic_spinrewriter_api_post($data);
if ($api_response_raw === FALSE) {
crawlomatic_log_to_file('"SpinRewriter" failed to exec curl after auth (after resubmit).');
return FALSE;
}
$api_response = json_decode($api_response_raw);
if(!isset($api_response->response) || !isset($api_response->status) || $api_response->status != 'OK')
{
crawlomatic_log_to_file('"SpinRewriter" error response4: ' . print_r($api_response_raw, true) . ' params: ' . print_r($data, true));
return FALSE;
}
}
}
$result = $api_response->response;
}
$result = explode($titleSeparator, $result);
if (count($result) < 2) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('"SpinRewriter" failed to spin article - titleseparator not found: ' . $api_response->response);
}
return FALSE;
}
return $result;
}
function crawlomatic_aiseo_spin_text($title, $content, $user_name = '')
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if($user_name != '')
{
$appi = $user_name;
}
else
{
if ((!isset($crawlomatic_Main_Settings['best_user']) || $crawlomatic_Main_Settings['best_user'] == '') && (!isset($crawlomatic_Main_Settings['best_password']) || $crawlomatic_Main_Settings['best_password'] == '')) {
crawlomatic_log_to_file('Please insert a valid "AISEO" user name and password.');
return FALSE;
}
if(!isset($crawlomatic_Main_Settings['best_password']) || $crawlomatic_Main_Settings['best_password'] == '')
{
$appi = $crawlomatic_Main_Settings['best_user'];
}
else
{
$appi = $crawlomatic_Main_Settings['best_password'];
}
}
$curl = curl_init();
if($curl === false)
{
return false;
}
curl_setopt_array($curl, array(
CURLOPT_URL => 'https://api.aiseo.ai/v2/rewrite',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 120,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_HTTPHEADER => array(
'Authorization: Bearer ' . $appi,
'Content-Type: application/json'
),
));
$aiseo_word_count = 250;
$html = $content;
if(str_word_count($html) >= $aiseo_word_count)
{
$result = '';
while($html != '' && $html != ' ')
{
$words = explode(" ", $html);
$first30k = join(" ", array_slice($words, 0, $aiseo_word_count));
$html = join(" ", array_slice($words, $aiseo_word_count));
$data = array(
"text" => $first30k,
"audience" => "general",
"formality" => "neutral",
"intent" => "inform"
);
$jsonData = json_encode($data);
curl_setopt_array($curl, array(
CURLOPT_POSTFIELDS => $jsonData
));
$api_response = curl_exec($curl);
if ($api_response === FALSE || empty($api_response))
{
curl_close($curl);
crawlomatic_log_to_file('"AISEO" failed to exec multi curl after auth.');
return FALSE;
}
$api_responsex = json_decode($api_response);
if(!isset($api_responsex->rewritten))
{
crawlomatic_log_to_file('"AISEO" multi error response: ' . print_r($api_response, true) . ' params: ' . print_r($data, true));
return FALSE;
}
$result .= ' ' . nl2br($api_responsex->rewritten);
}
curl_close($curl);
if($result !== '')
{
return array($title, $result);
}
else
{
crawlomatic_log_to_file('"AISEO" multi failed to rewrite content! ' . print_r($content, true));
return FALSE;
}
}
else
{
$data = array(
"text" => $content,
"audience" => "general",
"formality" => "neutral",
"intent" => "inform"
);
$jsonData = json_encode($data);
curl_setopt_array($curl, array(
CURLOPT_URL => 'https://api.aiseo.ai/v2/rewrite',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 120,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS => $jsonData,
CURLOPT_HTTPHEADER => array(
'Authorization: Bearer ' . $appi,
'Content-Type: application/json'
),
));
$api_response = curl_exec($curl);
if ($api_response === FALSE || empty($api_response))
{
curl_close($curl);
crawlomatic_log_to_file('"AISEO" failed to exec curl after auth.');
return FALSE;
}
curl_close($curl);
$api_responsex = json_decode($api_response);
if(!isset($api_responsex->rewritten))
{
crawlomatic_log_to_file('"AISEO" error response: ' . print_r($api_response, true) . ' params: ' . print_r($data, true));
return FALSE;
}
$api_response = nl2br($api_responsex->rewritten);
return array($title, $api_response);
}
}
function crawlomatic_turkcespin_spin_text($title, $content, $user_name = '')
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if($user_name != '')
{
$appi = $user_name;
}
else
{
if ((!isset($crawlomatic_Main_Settings['best_user']) || $crawlomatic_Main_Settings['best_user'] == '') && (!isset($crawlomatic_Main_Settings['best_password']) || $crawlomatic_Main_Settings['best_password'] == '')) {
crawlomatic_log_to_file('Please insert a valid "TurkceSpin" user name and password.');
return FALSE;
}
if(!isset($crawlomatic_Main_Settings['best_password']) || $crawlomatic_Main_Settings['best_password'] == '')
{
$appi = $crawlomatic_Main_Settings['best_user'];
}
else
{
$appi = $crawlomatic_Main_Settings['best_password'];
}
}
$titleSeparator = '[19459000]';
$html = $title . ' ' . $titleSeparator . ' ' . $content;
$postData = array(
'token' => $appi,
'article' => $html
);
$timeout = 60;
$ch = curl_init("https://turkcespin.com/api/spin");
if($ch === false)
{
return false;
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($postData));
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
$api_response = curl_exec ($ch);
curl_close($ch);
if ($api_response === FALSE) {
crawlomatic_log_to_file('"TurkceSpin" failed to exec curl after auth.');
return FALSE;
}
$api_responsex = json_decode($api_response);
if(!isset($api_responsex->article) || !isset($api_responsex->status) || ($api_responsex->status != 'ok' && $api_responsex->status != 'OK'))
{
crawlomatic_log_to_file('"TurkceSpin" error response: ' . print_r($api_response, true) . ' params: ' . print_r($appi, true) . ' --- ' . print_r($html, true));
return FALSE;
}
$api_response = urldecode($api_responsex->article);
$result = explode($titleSeparator, $api_response);
if (count($result) < 2) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('"TurkceSpin" failed to spin article - titleseparator not found.');
}
return FALSE;
}
return $result;
}
function crawlomatic_spinrewriter_api_post($data){
$data_raw = "";
$GLOBALS['wp_object_cache']->delete('crspinrewriter_spin_time', 'options');
$spin_time = get_option('crspinrewriter_spin_time', false);
if($spin_time !== false && is_numeric($spin_time))
{
$c_time = time();
$spassed = $c_time - $spin_time;
if($spassed < 10 && $spassed >= 0)
{
sleep(10 - $spassed);
}
}
update_option('crspinrewriter_spin_time', time());
foreach ($data as $key => $value){
$data_raw = $data_raw . $key . "=" . urlencode($value) . "&";
}
$ch = curl_init();
if($ch === false)
{
return false;
}
curl_setopt($ch, CURLOPT_URL, "http://www.spinrewriter.com/action/api");
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data_raw);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT,90);
$response = curl_exec($ch);
if(is_string($response))
{
$response = trim($response);
}
if($response === false)
{
crawlomatic_log_to_file('SpinRewriter failed: ' . curl_error($ch));
}
curl_close($ch);
return $response;
}
function crawlomatic_get_title($content)
{
preg_match('{<meta[^<]*?property=["\']og:title["\'][^<]*?>}i', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'content')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$auth = $matx[1];
if(trim($auth) !='')
{
return $auth;
}
}
}
preg_match('{<meta[^<]*?property=["\']twitter:title["\'][^<]*?>}i', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'content')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$auth = $matx[1];
if(trim($auth) !='')
{
return $auth;
}
}
}
preg_match('{<meta[^<]*?itemprop\s*=["\']title["\'][^<]*?>}i', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'content=')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$auth = $matx[1];
if(trim($auth) !='')
{
return $auth;
}
}
}
preg_match('{<meta[^<]*?itemprop\s*=["\']headline["\'][^<]*?>}i', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'content=')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$auth = $matx[1];
if(trim($auth) !='')
{
return $auth;
}
}
}
preg_match('{<title(?:[^>]*?)>([^<]*?)<\/title>}i', $content, $mathc);
if(isset($mathc[1][0])){
$auth = $mathc[1][0];
if(trim($auth) !='')
{
return $auth;
}
}
return '';
}
function crawlomatic_get_author($content)
{
preg_match('{<meta[^<]*?name=["\']author["\'][^<]*?>}s', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'author')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$auth = $matx[1];
if(trim($auth) !='')
{
return $auth;
}
}
}
preg_match('{<meta[^<]*?name=["\']dc.creator["\'][^<]*?>}s', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'content')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$auth = $matx[1];
if(trim($auth) !='')
{
return $auth;
}
}
}
preg_match('{<meta[^<]*?property=["\']article:author["\'][^<]*?>}s', $content, $mathc);
if(isset($mathc[0]) && stristr($mathc[0], 'content')){
preg_match('{content\s*=["\'](.*?)["\']}s', $mathc[0],$matx);
if(isset($matx[1]))
{
$auth = $matx[1];
if(trim($auth) !='')
{
return $auth;
}
}
}
return '';
}
function crawlomatic_replaceExcludes($text, &$htmlfounds, &$pre_tags_matches, &$pre_tags_matches_s, &$conseqMatchs)
{
preg_match_all ( '{<script.*?script>}s', $text, $script_matchs );
$script_matchs = $script_matchs [0];
preg_match_all ( '{<pre.*?/pre>}s', $text, $pre_matchs );
$pre_matchs = $pre_matchs [0];
preg_match_all ( '{<code.*?/code>}s', $text, $code_matchs );
$code_matchs = $code_matchs [0];
preg_match_all ( "/<[^<>]+>/is", $text, $matches, PREG_PATTERN_ORDER );
$htmlfounds = array_filter ( array_unique ( $matches [0] ) );
$htmlfounds = array_merge ( $script_matchs, $pre_matchs, $code_matchs, $htmlfounds );
$htmlfounds [] = '"';
$imgFoundsSeparated = array ();
$new_imgFoundsSeparated = array ();
$altSeparator = '';
$colonSeparator = '';
foreach ( $htmlfounds as $key => $currentFound )
{
if (stristr ( $currentFound, '<img' ) && stristr ( $currentFound, 'alt' ) && ! stristr ( $currentFound, 'alt=""' ))
{
$altSeparator = '';
$colonSeparator = '';
if (stristr ( $currentFound, 'alt="' )) {
$altSeparator = 'alt="';
$colonSeparator = '"';
} elseif (stristr ( $currentFound, 'alt = "' )) {
$altSeparator = 'alt = "';
$colonSeparator = '"';
} elseif (stristr ( $currentFound, 'alt ="' )) {
$altSeparator = 'alt ="';
$colonSeparator = '"';
} elseif (stristr ( $currentFound, 'alt= "' )) {
$altSeparator = 'alt= "';
$colonSeparator = '"';
} elseif (stristr ( $currentFound, 'alt=\'' )) {
$altSeparator = 'alt=\'';
$colonSeparator = '\'';
} elseif (stristr ( $currentFound, 'alt = \'' )) {
$altSeparator = 'alt = \'';
$colonSeparator = '\'';
} elseif (stristr ( $currentFound, 'alt= \'' )) {
$altSeparator = 'alt= \'';
$colonSeparator = '\'';
} elseif (stristr ( $currentFound, 'alt =\'' )) {
$altSeparator = 'alt =\'';
$colonSeparator = '\'';
}
if (trim ( $altSeparator ) != '')
{
$currentFoundParts = explode ( $altSeparator, $currentFound );
$preAlt = $currentFoundParts [1];
$preAltParts = explode ( $colonSeparator, $preAlt );
$altText = $preAltParts [0];
if (trim ( $altText ) != '')
{
unset ( $preAltParts [0] );
$past_alt_text = implode ( $colonSeparator, $preAltParts );
$imgFoundsSeparated [] = $currentFoundParts [0] . $altSeparator;
$imgFoundsSeparated [] = $colonSeparator . $past_alt_text;
$htmlfounds [$key] = '';
}
}
}
}
$title_separator = str_replace ( 'alt', 'title', $altSeparator );
if($title_separator == '')
{
$title_separator = 'title';
}
if($colonSeparator != '')
{
foreach ( $imgFoundsSeparated as $img_part )
{
if (stristr ( $img_part, ' title' ))
{
$img_part_parts = explode ( $title_separator, $img_part );
$pre_title_part = $img_part_parts [0] . $title_separator;
$post_title_parts = explode ( $colonSeparator, $img_part_parts [1] );
$found_title = $post_title_parts [0];
unset ( $post_title_parts [0] );
$past_title_text = implode ( $colonSeparator, $post_title_parts );
$post_title_part = $colonSeparator . $past_title_text;
$new_imgFoundsSeparated [] = $pre_title_part;
$new_imgFoundsSeparated [] = $post_title_part;
} else {
$new_imgFoundsSeparated [] = $img_part;
}
}
}
if (count ( $new_imgFoundsSeparated ) != 0) {
$htmlfounds = array_merge ( $htmlfounds, $new_imgFoundsSeparated );
}
preg_match_all ( "/<\!--.*?-->/is", $text, $matches2, PREG_PATTERN_ORDER );
$newhtmlfounds = $matches2 [0];
preg_match_all ( "/\[.*?\]/is", $text, $matches3, PREG_PATTERN_ORDER );
$shortcodesfounds = $matches3 [0];
$htmlfounds = array_merge ( $htmlfounds, $newhtmlfounds, $shortcodesfounds );
$in = 0;
$cleanHtmlFounds = array ();
foreach ( $htmlfounds as $htmlfound ) {
if ($htmlfound == '[19459000]') {
} elseif (trim ( $htmlfound ) == '') {
} else {
$cleanHtmlFounds [] = $htmlfound;
}
}
$htmlfounds = array_filter ( $cleanHtmlFounds );
$start = 19459001;
foreach ( $htmlfounds as $htmlfound ) {
$text = str_replace ( $htmlfound, '[' . $start . ']', $text );
$start ++;
}
$text = str_replace ( '.{', '. {', $text );
preg_match_all ( '!(?:\[1945\d*\][\s]*){2,}!s', $text, $conseqMatchs );
$startConseq = 19659001;
foreach ( $conseqMatchs [0] as $conseqMatch ) {
$text = preg_replace ( '{' . preg_quote ( trim ( $conseqMatch ) ) . '}', '[' . $startConseq . ']', $text, 1 );
$startConseq ++;
}
preg_match_all ( '{\[.*?\]}', $text, $pre_tags_matches );
$pre_tags_matches = ($pre_tags_matches [0]);
preg_match_all ( '{\s*\[.*?\]\s*}u', $text, $pre_tags_matches_s );
$pre_tags_matches_s = ($pre_tags_matches_s [0]);
$text = str_replace ( '[', "
[", $text );
$text = str_replace ( ']', "]
", $text );
return $text;
}
function crawlomatic_restoreExcludes($translated, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs){
$translated = preg_replace ( '{]\s*?1945}', '][1945', $translated );
$translated = preg_replace ( '{ 19459(\d*?)]}', ' [19459$1]', $translated );
$translated = str_replace ( '[ [1945', '[1945', $translated );
$translated = str_replace ( '], ', ']', $translated );
preg_match_all ( '{\[.*?\]}', $translated, $bracket_matchs );
$bracket_matchs = $bracket_matchs [0];
foreach ( $bracket_matchs as $single_bracket )
{
if (stristr ( $single_bracket, '1' ) && stristr ( $single_bracket, '9' )) {
$single_bracket_clean = str_replace ( array (
',',
' '
), '', $single_bracket );
$translated = str_replace ( $single_bracket, $single_bracket_clean, $translated );
}
}
preg_match_all ( '{\[\d*?\]}', $translated, $post_tags_matches );
$post_tags_matches = ($post_tags_matches [0]);
if (count ( $pre_tags_matches ) == count ( $post_tags_matches ))
{
if ($pre_tags_matches !== $post_tags_matches)
{
$i = 0;
foreach ( $post_tags_matches as $post_tags_match ) {
$translated = preg_replace ( '{' . preg_quote ( trim ( $post_tags_match ) ) . '}', '[' . $i . ']', $translated, 1 );
$i ++;
}
$i = 0;
foreach ( $pre_tags_matches as $pre_tags_match ) {
$translated = str_replace ( '[' . $i . ']', $pre_tags_match, $translated );
$i ++;
}
}
}
$translated = str_replace ( "
[", '[', $translated );
$translated = str_replace ( "]
", ']', $translated );
$i = 0;
foreach ( $pre_tags_matches_s as $pre_tags_match )
{
$pre_tags_match_h = htmlentities ( $pre_tags_match );
if (stristr ( $pre_tags_match_h, ' ' )) {
$pre_tags_match = str_replace ( ' ', ' ', $pre_tags_match_h );
}
$translated = preg_replace ( '{' . preg_quote ( trim ( $pre_tags_match ) ) . '}', "[$i]", $translated, 1 );
$i ++;
}
$translated = preg_replace ( '{\s*\[}u', '[', $translated );
$translated = preg_replace ( '{\]\s*}u', ']', $translated );
$i = 0;
foreach ( $pre_tags_matches_s as $pre_tags_match )
{
$pre_tags_match_h = htmlentities ( $pre_tags_match );
if (stristr ( $pre_tags_match_h, ' ' )) {
$pre_tags_match = str_replace ( ' ', ' ', $pre_tags_match_h );
}
$translated = preg_replace ( '{' . preg_quote ( "[$i]" ) . '}', $pre_tags_match, $translated, 1 );
$i ++;
}
$startConseq = 19659001;
foreach ( $conseqMatchs [0] as $conseqMatch ) {
$translated = str_replace ( '[' . $startConseq . ']', $conseqMatch, $translated );
$startConseq ++;
}
preg_match_all ( '!\[.*?\]!', $translated, $brackets );
$brackets = $brackets [0];
$brackets = array_unique ( $brackets );
foreach ( $brackets as $bracket ) {
if (stristr ( $bracket, '19' ))
{
$corrrect_bracket = str_replace ( ' ', '', $bracket );
$corrrect_bracket = str_replace ( '.', '', $corrrect_bracket );
$corrrect_bracket = str_replace ( ',', '', $corrrect_bracket );
$translated = str_replace ( $bracket, $corrrect_bracket, $translated );
}
}
$start = 19459001;
foreach ( $htmlfounds as $htmlfound ) {
$translated = str_replace ( '[' . $start . ']', $htmlfound, $translated );
$start ++;
}
return $translated;
}
function crawlomatic_fix_spinned_content($final_content, $spinner)
{
if ($spinner == 'wordai') {
$final_content = str_replace('-LRB-', '(', $final_content);
$final_content1 = preg_replace("/{\*\|.*?}/", '*', $final_content);
if($final_content1 !== null)
{
$final_content = $final_content1;
}
}
elseif ($spinner == 'spinnerchief') {
$final_content = preg_replace('#\[[\s\\/]*([\d]*?)[\s\\/]*\[#', '[$1]', $final_content);
$final_content = preg_replace('#\][\s\\/]*([\d]*?)[\s\\/]*\]#', '[$1]', $final_content);
$final_content = preg_replace('#\[[\s\\/]*([\d]*?)[\s\\/]*\]#', '[$1]', $final_content);
}
elseif ($spinner == 'spinrewriter' || $spinner == 'translate') {
$final_content = str_replace('& #', '&#', $final_content);
$final_content = preg_replace('#&\s([a-zA-Z]+?);#', '', $final_content);
}
return $final_content;
}
function crawlomatic_spin_and_translate($post_title, $final_content, $translate, $source_lang, $use_proxy = '1', $no_spin = '0', $second_translate = 'disabled')
{
$translation = false;
$pre_tags_matches = array();
$pre_tags_matches_s = array();
$conseqMatchs = array();
$turk = false;
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
if (isset($crawlomatic_Main_Settings['spin_text']) && $crawlomatic_Main_Settings['spin_text'] !== 'disabled' && $no_spin != '1') {
if ($crawlomatic_Main_Settings['spin_text'] == 'turkcespin' || $crawlomatic_Main_Settings['spin_text'] == 'aiseo') {
$turk = true;
}
$htmlfounds = array();
if($turk == false)
{
$final_content = crawlomatic_replaceExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
if ($crawlomatic_Main_Settings['spin_text'] == 'builtin') {
$translation = crawlomatic_builtin_spin_text($post_title, $final_content);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'wikisynonyms') {
$translation = crawlomatic_spin_text($post_title, $final_content, false);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'freethesaurus') {
$translation = crawlomatic_spin_text($post_title, $final_content, true);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'best') {
$translation = crawlomatic_best_spin_text($post_title, $final_content);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'wordai') {
$translation = crawlomatic_wordai_spin_text($post_title, $final_content);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'tldr') {
$translation = crawlomatic_summarize_content($post_title, $final_content);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'spinrewriter') {
if(isset($crawlomatic_Main_Settings['confidence_level']) && $crawlomatic_Main_Settings['confidence_level'] != '')
{
$confidence = $crawlomatic_Main_Settings['confidence_level'];
}
else
{
$confidence = 'medium';
}
$translation = crawlomatic_spinrewriter_spin_text($post_title, $final_content, $confidence);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'turkcespin') {
$translation = crawlomatic_turkcespin_spin_text($post_title, $final_content);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'aiseo') {
$translation = crawlomatic_aiseo_spin_text($post_title, $final_content);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'spinnerchief') {
$translation = crawlomatic_spinnerchief_spin_text($post_title, $final_content);
}
if ($translation !== FALSE) {
if (is_array($translation) && isset($translation[0]) && isset($translation[1])) {
if (isset($crawlomatic_Main_Settings['no_title_spin']) && $crawlomatic_Main_Settings['no_title_spin'] == 'on') {
}
else
{
$post_title = $translation[0];
}
$final_content = $translation[1];
if($turk == false)
{
$final_content = crawlomatic_fix_spinned_content($final_content, $crawlomatic_Main_Settings['spin_text']);
$final_content = crawlomatic_restoreExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
} else {
if($turk == false)
{
$final_content = crawlomatic_restoreExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Text Spinning failed - malformed data ' . $crawlomatic_Main_Settings['spin_text']);
}
}
} else {
if($turk == false)
{
$final_content = crawlomatic_restoreExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Text Spinning Failed - returned false ' . $crawlomatic_Main_Settings['spin_text']);
}
}
}
if ($translate != 'disabled') {
if (isset($source_lang) && $source_lang != 'disabled' && $source_lang != '') {
$tr = $source_lang;
}
else
{
$tr = 'auto';
}
$htmlfounds = array();
$final_content = crawlomatic_replaceExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
$translation = crawlomatic_translate($post_title, $final_content, $tr, $translate, $use_proxy);
if (is_array($translation) && isset($translation[1]))
{
if($second_translate != '' && $second_translate != 'disabled')
{
$translation = crawlomatic_translate($translation[0], $translation[1], $translate, $second_translate, $use_proxy);
if (is_array($translation) && isset($translation[1]))
{
$translation[1] = crawlomatic_restoreExcludes($translation[1], $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
else
{
$final_content = crawlomatic_restoreExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
$translation = false;
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to translate text the second time, from ' . $translate . ' to ' . $second_translate);
}
}
}
else
{
$translation[1] = crawlomatic_restoreExcludes($translation[1], $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
}
else
{
$final_content = crawlomatic_restoreExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to translate text, from ' . $tr . ' to ' . $translate);
}
}
if ($translation !== FALSE) {
if (is_array($translation) && isset($translation[0]) && isset($translation[1])) {
if (isset($crawlomatic_Main_Settings['no_title_spin']) && $crawlomatic_Main_Settings['no_title_spin'] == 'on') {
}
else
{
$post_title = $translation[0];
}
$final_content = $translation[1];
$final_content = str_replace('</ iframe>', '</iframe>', $final_content);
if(stristr($final_content, '<head>') !== false)
{
$d = new DOMDocument;
$mock = new DOMDocument;
$internalErrors = libxml_use_internal_errors(true);
$d->loadHTML('<?xml encoding="utf-8" ?>' . $final_content);
libxml_use_internal_errors($internalErrors);
$body = $d->getElementsByTagName('body')->item(0);
foreach ($body->childNodes as $child)
{
$mock->appendChild($mock->importNode($child, true));
}
$new_post_content_temp = $mock->saveHTML();
if($new_post_content_temp !== '' && $new_post_content_temp !== false)
{
$new_post_content_temp = str_replace('<?xml encoding="utf-8" ?>', '', $new_post_content_temp);
$final_contentx = preg_replace("/_addload\(function\(\){([^<]*)/i", "", $new_post_content_temp);
if($final_contentx !== null)
{
$final_content = $final_contentx;
}
}
}
$final_content = htmlspecialchars_decode($final_content);
$final_content = str_replace('</ ', '</', $final_content);
$final_content = str_replace(' />', '/>', $final_content);
$final_content = str_replace('< br/>', '<br/>', $final_content);
$final_content = str_replace('< / ', '</', $final_content);
$final_content = str_replace(' / >', '/>', $final_content);
$final_content1 = preg_replace('/[-]/u', '', $final_content);
if($final_content1 !== null)
{
$final_content = $final_content1;
}
$final_content = html_entity_decode($final_content);
$final_content = preg_replace_callback("#src(?:\s)?=(?:\s)?[\'\"]([^\"\']+?)[\'\"]#", "crawlomatic_removeSpaces", $final_content);
if (isset($crawlomatic_Main_Settings['no_title_spin']) && $crawlomatic_Main_Settings['no_title_spin'] == 'on') {
}
else
{
$post_title1 = preg_replace('{&\s*#\s*(\d+)\s*;}', '&#$1;', $post_title);
if($post_title1 !== null)
{
$post_title = $post_title1;
}
$post_title = htmlspecialchars_decode($post_title);
$post_title = str_replace('</ ', '</', $post_title);
$post_title = str_replace(' />', '/>', $post_title);
$post_title1 = preg_replace('/[-]/u', '', $post_title);
if($post_title1 !== null)
{
$post_title = $post_title1;
}
}
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Translation failed - malformed data!');
}
}
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Translation Failed - returned false!');
}
}
}
return array(
$post_title,
$final_content
);
}
function crawlomatic_spin_and_translate_shortcode($final_content, $spin, $translate, $source_lang, $use_proxy = '1', $second_translate = 'disabled')
{
$pre_tags_matches = array();
$pre_tags_matches_s = array();
$conseqMatchs = array();
if($spin != '')
{
$user_name = '';
$pass = '';
$turk = false;
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$spin_prts = explode(':', $spin);
if(isset($spin_prts[0]) && (trim($spin_prts[0]) == 'bestspinner' || trim($spin_prts[0]) == 'wordai' || trim($spin_prts[0]) == 'spinrewriter' || trim($spin_prts[0]) == 'turkcespin' || trim($spin_prts[0]) == 'aiseo' || trim($spin_prts[0]) == 'builtin' || trim($spin_prts[0]) == 'wikisynonyms' || trim($spin_prts[0]) == 'freethesaurus'))
{
$crawlomatic_Main_Settings['spin_text'] = trim($spin_prts[0]);
if(isset($spin_prts[1]))
{
$user_name = trim($spin_prts[1]);
if(isset($spin_prts[2]))
{
$pass = trim($spin_prts[2]);
}
}
}
if (isset($crawlomatic_Main_Settings['spin_text']) && $crawlomatic_Main_Settings['spin_text'] !== 'disabled') {
if ($crawlomatic_Main_Settings['spin_text'] == 'turkcespin' || $crawlomatic_Main_Settings['spin_text'] == 'aiseo') {
$turk = true;
}
$htmlfounds = array();
if($turk == false)
{
$final_content = crawlomatic_replaceExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
if ($crawlomatic_Main_Settings['spin_text'] == 'builtin') {
$translation = crawlomatic_builtin_spin_text('hello', $final_content);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'wikisynonyms') {
$translation = crawlomatic_spin_text('hello', $final_content, false);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'freethesaurus') {
$translation = crawlomatic_spin_text('hello', $final_content, true);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'best') {
$translation = crawlomatic_best_spin_text('hello', $final_content, $user_name, $pass);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'wordai') {
$translation = crawlomatic_wordai_spin_text('hello', $final_content, $user_name, $pass);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'tldr') {
$translation = crawlomatic_summarize_content($post_title, $final_content);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'spinrewriter') {
if(isset($crawlomatic_Main_Settings['confidence_level']) && $crawlomatic_Main_Settings['confidence_level'] != '')
{
$confidence = $crawlomatic_Main_Settings['confidence_level'];
}
else
{
$confidence = 'medium';
}
$translation = crawlomatic_spinrewriter_spin_text('hello', $final_content, $confidence);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'turkcespin') {
$translation = crawlomatic_turkcespin_spin_text('hello', $final_content, $user_name);
} elseif ($crawlomatic_Main_Settings['spin_text'] == 'aiseo') {
$translation = crawlomatic_aiseo_spin_text('hello', $final_content, $user_name);
}
if ($translation !== FALSE) {
if (is_array($translation) && isset($translation[0]) && isset($translation[1])) {
$final_content = $translation[1];
if($turk == false)
{
$final_content = crawlomatic_fix_spinned_content($final_content, $crawlomatic_Main_Settings['spin_text']);
$final_content = crawlomatic_restoreExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
} else {
if($turk == false)
{
$final_content = crawlomatic_restoreExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Shortcode Text Spinning failed - malformed data ' . $crawlomatic_Main_Settings['spin_text']);
}
}
} else {
if($turk == false)
{
$final_content = crawlomatic_restoreExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Shortcode Text Spinning Failed - returned false ' . $crawlomatic_Main_Settings['spin_text']);
}
}
}
}
if ($translate != 'disabled' && $translate != '') {
if (isset($source_lang) && $source_lang != 'disabled' && $source_lang != '') {
$tr = $source_lang;
}
else
{
$tr = 'auto';
}
$htmlfounds = array();
$final_content = crawlomatic_replaceExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
$translation = crawlomatic_translate('hello', $final_content, $tr, $translate, $use_proxy);
if (is_array($translation) && isset($translation[1]))
{
if($second_translate != '' && $second_translate != 'disabled')
{
$translation = crawlomatic_translate($translation[0], $translation[1], $translate, $second_translate, $use_proxy);
if (is_array($translation) && isset($translation[1]))
{
$translation[1] = crawlomatic_restoreExcludes($translation[1], $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
else
{
$final_content = crawlomatic_restoreExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
$translation = false;
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to translate shortcode text the second time, from ' . $translate . ' to ' . $second_translate);
}
}
}
else
{
$translation[1] = crawlomatic_restoreExcludes($translation[1], $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
}
}
else
{
$final_content = crawlomatic_restoreExcludes($final_content, $htmlfounds, $pre_tags_matches, $pre_tags_matches_s, $conseqMatchs);
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to translate shortcode text, from ' . $tr . ' to ' . $translate);
}
}
if ($translation !== FALSE) {
if (is_array($translation) && isset($translation[0]) && isset($translation[1])) {
$final_content = $translation[1];
$final_content = str_replace('</ iframe>', '</iframe>', $final_content);
if(stristr($final_content, '<head>') !== false)
{
$d = new DOMDocument;
$mock = new DOMDocument;
$internalErrors = libxml_use_internal_errors(true);
$d->loadHTML('<?xml encoding="utf-8" ?>' . $final_content);
libxml_use_internal_errors($internalErrors);
$body = $d->getElementsByTagName('body')->item(0);
foreach ($body->childNodes as $child)
{
$mock->appendChild($mock->importNode($child, true));
}
$new_post_content_temp = $mock->saveHTML();
if($new_post_content_temp !== '' && $new_post_content_temp !== false)
{
$new_post_content_temp = str_replace('<?xml encoding="utf-8" ?>', '', $new_post_content_temp);
$final_content1 = preg_replace("/_addload\(function\(\){([^<]*)/i", "", $new_post_content_temp);
if($final_content1 !== null)
{
$final_content = $final_content1;
}
}
}
$final_content = htmlspecialchars_decode($final_content);
$final_content = str_replace('</ ', '</', $final_content);
$final_content = str_replace(' />', '/>', $final_content);
$final_content = str_replace('< br/>', '<br/>', $final_content);
$final_content = str_replace('< / ', '</', $final_content);
$final_content = str_replace(' / >', '/>', $final_content);
$final_content1 = preg_replace('/[-]/u', '', $final_content);
if($final_content1 !== null)
{
$final_content = $final_content1;
}
$final_content = html_entity_decode($final_content);
$final_content = preg_replace_callback("#src(?:\s)?=(?:\s)?[\'\"]([^\"\']+?)[\'\"]#", "crawlomatic_removeSpaces", $final_content);
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Shortcode Translation failed - malformed data!');
}
}
} else {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Shortcode Translation Failed - returned false!');
}
}
}
return $final_content;
}
function crawlomatic_translate($title, $content, $from, $to, $use_proxy = '1')
{
$crawlomatic_Main_Settings = get_option('crawlomatic_Main_Settings', false);
$ch = FALSE;
try {
if($from == 'disabled')
{
if(strstr($to, '-') !== false && $to != 'zh-CN' && $to != 'zh-TW')
{
$from = 'auto-';
}
else
{
$from = 'auto';
}
}
if($from != 'en' && $from != 'en-' && $from == $to)
{
if(strstr($to, '-') !== false && $to != 'zh-CN' && $to != 'zh-TW')
{
$from = 'en-';
}
else
{
$from = 'en';
}
}
elseif(($from == 'en' || $from == 'en-') && $from == $to)
{
return false;
}
if(strstr($to, '!') !== false)
{
if (!isset($crawlomatic_Main_Settings['bing_auth']) || trim($crawlomatic_Main_Settings['bing_auth']) == '')
{
throw new Exception('You must enter a Microsoft Translator API key from plugin settings, to use this feature!');
}
require_once (dirname(__FILE__) . "/res/crawlomatic-translator-microsoft.php");
$options = array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_TIMEOUT => 60,
CURLOPT_MAXREDIRS => 10,
CURLOPT_SSL_VERIFYHOST => 0,
CURLOPT_SSL_VERIFYPEER => 0
);
$ch = curl_init();
if ($ch === FALSE) {
crawlomatic_log_to_file ('Failed to init curl in Microsoft Translator');
return false;
}
if ($use_proxy && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled') {
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
$options[CURLOPT_PROXY] = trim($prx[$randomness]);
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
$options[CURLOPT_PROXYUSERPWD] = trim($prx_auth[$randomness]);
}
}
}
curl_setopt_array($ch, $options);
$MicrosoftTranslator = new MicrosoftTranslator ( $ch );
try
{
if (!isset($crawlomatic_Main_Settings['bing_region']) || trim($crawlomatic_Main_Settings['bing_region']) == '')
{
$mt_region = 'global';
}
else
{
$mt_region = trim($crawlomatic_Main_Settings['bing_region']);
}
if($from == 'auto' || $from == 'auto-' || $from == 'disabled')
{
$from = 'no';
}
$accessToken = $MicrosoftTranslator->getToken ( trim($crawlomatic_Main_Settings['bing_auth']) , $mt_region );
$from = trim($from, '!');
$to = trim($to, '!');
$translated = $MicrosoftTranslator->translateWrap ( $content, $from, $to );
$translated_title = $MicrosoftTranslator->translateWrap ( $title, $from, $to );
curl_close($ch);
}
catch ( Exception $e )
{
curl_close($ch);
crawlomatic_log_to_file ('Microsoft Translation error: ' . $e->getMessage());
return false;
}
}
elseif(strstr($to, '-') !== false && $to != 'zh-CN' && $to != 'zh-TW')
{
if (!isset($crawlomatic_Main_Settings['deepl_auth']) || trim($crawlomatic_Main_Settings['deepl_auth']) == '')
{
throw new Exception('You must enter a DeepL API key from plugin settings, to use this feature!');
}
$to = rtrim($to, '-');
$from = rtrim($from, '-');
if(strlen($content) > 13000)
{
$translated = '';
while($content != '')
{
$first30k = substr($content, 0, 13000);
$content = substr($content, 13000);
if (isset($crawlomatic_Main_Settings['deppl_free']) && trim($crawlomatic_Main_Settings['deppl_free']) == 'on')
{
$deepapi = 'https://api-free.deepl.com/v2/translate';
}
else
{
$deepapi = 'https://api.deepl.com/v2/translate';
}
$ch = curl_init($deepapi);
if($ch !== false)
{
$data = array();
$data['text'] = $first30k;
if($from != 'auto')
{
$data['source_lang'] = $from;
}
$data['tag_handling'] = 'xml';
$data['non_splitting_tags'] = 'div';
$data['preserve_formatting'] = '1';
$data['target_lang'] = $to;
$data['auth_key'] = trim($crawlomatic_Main_Settings['deepl_auth']);
$fdata = "";
foreach ($data as $key => $val) {
$fdata .= "$key=" . urlencode(trim($val)) . "&";
}
$headers = [
'Content-Type: application/x-www-form-urlencoded',
'Content-Length: ' . strlen($fdata)
];
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERAGENT, crawlomatic_get_random_user_agent());
curl_setopt($ch, CURLOPT_POSTFIELDS, $fdata);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$translated_temp = curl_exec($ch);
if($translated_temp === false)
{
throw new Exception('Failed to post to DeepL: ' . curl_error($ch));
}
curl_close($ch);
}
$trans_json = json_decode($translated_temp, true);
if($trans_json === false)
{
throw new Exception('Incorrect multipart response from DeepL: ' . $translated_temp);
}
if(!isset($trans_json['translations'][0]['text']))
{
throw new Exception('Unrecognized multipart response from DeepL: ' . $translated_temp);
}
$translated .= ' ' . $trans_json['translations'][0]['text'];
}
}
else
{
if (isset($crawlomatic_Main_Settings['deppl_free']) && trim($crawlomatic_Main_Settings['deppl_free']) == 'on')
{
$deepapi = 'https://api-free.deepl.com/v2/translate';
}
else
{
$deepapi = 'https://api.deepl.com/v2/translate';
}
$ch = curl_init($deepapi);
if($ch !== false)
{
$data = array();
$data['text'] = $content;
if($from != 'auto')
{
$data['source_lang'] = $from;
}
$data['tag_handling'] = 'xml';
$data['non_splitting_tags'] = 'div';
$data['preserve_formatting'] = '1';
$data['target_lang'] = $to;
$data['auth_key'] = trim($crawlomatic_Main_Settings['deepl_auth']);
$fdata = "";
foreach ($data as $key => $val) {
$fdata .= "$key=" . urlencode(trim($val)) . "&";
}
curl_setopt($ch, CURLOPT_POST, 1);
$headers = [
'Content-Type: application/x-www-form-urlencoded',
'Content-Length: ' . strlen($fdata)
];
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_POSTFIELDS, $fdata);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$translated = curl_exec($ch);
if($translated === false)
{
throw new Exception('Failed to post to DeepL: ' . curl_error($ch));
}
curl_close($ch);
}
$trans_json = json_decode($translated, true);
if($trans_json === false)
{
throw new Exception('Incorrect text response from DeepL: ' . $translated);
}
if(!isset($trans_json['translations'][0]['text']))
{
throw new Exception('Unrecognized text response from DeepL: ' . $translated);
}
$translated = $trans_json['translations'][0]['text'];
}
$translated = str_replace('<strong>', ' <strong>', $translated);
$translated = str_replace('</strong>', '</strong> ', $translated);
if($from != 'auto')
{
$from_from = '&source_lang=' . $from;
}
else
{
$from_from = '';
}
if (isset($crawlomatic_Main_Settings['deppl_free']) && trim($crawlomatic_Main_Settings['deppl_free']) == 'on')
{
$deepapi = 'https://api-free.deepl.com/v2/translate?text=';
}
else
{
$deepapi = 'https://api.deepl.com/v2/translate?text=';
}
$translated_title = crawlomatic_get_web_page($deepapi . urlencode($title) . $from_from . '&target_lang=' . $to . '&auth_key=' . trim($crawlomatic_Main_Settings['deepl_auth']) . '&tag_handling=xml&preserve_formatting=1', '', '', '0', '', '', '', '');
$trans_json = json_decode($translated_title, true);
if($trans_json === false)
{
throw new Exception('Incorrect title response from DeepL: ' . $translated_title);
}
if(!isset($trans_json['translations'][0]['text']))
{
throw new Exception('Unrecognized title response from DeepL: ' . $translated_title);
}
$translated_title = $trans_json['translations'][0]['text'];
}
else
{
if (isset($crawlomatic_Main_Settings['google_trans_auth']) && trim($crawlomatic_Main_Settings['google_trans_auth']) != '')
{
require_once(dirname(__FILE__) . "/res/translator-api.php");
$ch = curl_init();
if ($ch === FALSE) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to init cURL in translator!');
}
return false;
}
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{
$prx = explode(',', $crawlomatic_Main_Settings['proxy_url']);
$randomness = array_rand($prx);
curl_setopt( $ch, CURLOPT_PROXY, trim($prx[$randomness]));
if (isset($crawlomatic_Main_Settings['proxy_auth']) && $crawlomatic_Main_Settings['proxy_auth'] != '')
{
$prx_auth = explode(',', $crawlomatic_Main_Settings['proxy_auth']);
if(isset($prx_auth[$randomness]) && trim($prx_auth[$randomness]) != '')
{
curl_setopt( $ch, CURLOPT_PROXYUSERPWD, trim($prx_auth[$randomness]) );
}
}
}
$timeout = 60;
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
$GoogleTranslatorAPI = new GoogleTranslatorAPI($ch, $crawlomatic_Main_Settings['google_trans_auth']);
$translated = '';
$translated_title = '';
if($content != '')
{
if(strlen($content) > 13000)
{
while($content != '')
{
$first30k = substr($content, 0, 13000);
$content = substr($content, 13000);
$translated_temp = $GoogleTranslatorAPI->translateText($first30k, $from, $to);
$translated .= ' ' . $translated_temp;
}
}
else
{
$translated = $GoogleTranslatorAPI->translateText($content, $from, $to);
}
}
if($title != '')
{
$translated_title = $GoogleTranslatorAPI->translateText($title, $from, $to);
}
curl_close($ch);
}
else
{
require_once(dirname(__FILE__) . "/res/crawlomatic-translator.php");
$ch = curl_init();
if ($ch === FALSE) {
if (isset($crawlomatic_Main_Settings['enable_detailed_logging'])) {
crawlomatic_log_to_file('Failed to init cURL in translator!');
}
return false;
}
if ($use_proxy == '1' && isset($crawlomatic_Main_Settings['proxy_url']) && $crawlomatic_Main_Settings['proxy_url'] != '' && $crawlomatic_Main_Settings['proxy_url'] != 'disable' && $crawlomatic_Main_Settings['proxy_url'] != 'disabled')
{