add parser arriah.ru

This commit is contained in:
2022-08-18 01:49:10 +03:00
parent cb7c116251
commit a13b193d5b
3 changed files with 221 additions and 0 deletions

127
arriah.php Normal file
View File

@@ -0,0 +1,127 @@
<?php
include_once '/vhosts/beta/wp-load.php';
arriah_parser();
function arriah_parser(){
global $wpdb;
$feed = 'https://new.arriah.ru/rss.php';
$arrContextOptions=array(
"ssl"=>array(
"verify_peer"=>false,
"verify_peer_name"=>false,
),
);
$xml = file_get_contents($feed, false, stream_context_create($arrContextOptions));
$result = array();
$res = array();
$xml = simplexml_load_string($xml);
$n_old = 0;
$nn = 0;
foreach ($xml->xpath('//item') as $item){
$link = (string)$item->link;
$link = trim($link, '/');
$f_end = strrpos($link,'/');
if ($f_end){
$code = substr($link, $f_end+1);
}
else
continue;
if (isset($find_news['ID'])){
$n_old++;
continue;
}
else {
$date = (string)$item->pubDate;
$mysqldate = date('Y-m-d H:i:s',strtotime($date));
$title = (string)$item->title;
$title = wp_strip_all_tags($title);
$text = (string)$item->description;
$text = str_replace('src="/local/', 'src="https://new.arriah.ru/local/', $text);
//$text = replace_br($text);
//$text = delete_p($text);
$text = str_replace('<br>', '<br><br>', $text);
$slug = express_slug(vij_slug($title));
$sql = "SELECT `ID` from `wp_posts` WHERE `post_name` = '$slug' and `post_date` = '$mysqldate'";
$result = $wpdb->get_results($sql);
if ($result) {
continue;
}
$my_post = [
'post_title' => $title,
'post_name' => $slug,
'post_type' => 'post',
'post_content' => $text,
'post_date' => $mysqldate,
'post_status' => 'publish',
'post_author' => 1,
'comment_status' => 'closed',
'post_category' => array( 37 )
];
wp_insert_post( $my_post);
}
}
return True;
}
function file_get_contents_curl( $url ) {
$ch = curl_init();
curl_setopt( $ch, CURLOPT_AUTOREFERER, TRUE );
curl_setopt( $ch, CURLOPT_HEADER, 0 );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, TRUE );
$data = curl_exec( $ch );
curl_close( $ch );
return $data;
}
function replace_br($html) {
$html = preg_replace('#(?:<br\s*/?>\s*?){2,}#', '</p><p>', $html);
return "<p>$html</p>";
}
function delete_p($html){
$pattern = "/<p[^>]*><\\/p[^>]*>/";
return preg_replace($pattern, '', $html);
}