sources url checking

addded some preliminary url validation to make sure a source url is
still active.

the consolidation process is starting to get top heavy, so it's a stop
gap until a more effecient process can be created.
This commit is contained in:
ro 2024-02-12 14:51:28 -06:00
parent f1995d2163
commit e426eb2735

View file

@ -52,19 +52,45 @@ class LocationController extends Controller
{ {
$duplicates = 0; $duplicates = 0;
$fresh = 0; $fresh = 0;
$missing = [];
$unified = []; $unified = [];
$sources = Source::where("active", true)->get(); $cleanSources = [];
$sources = Source::where("active", true)->get();
//checks source url to make sure they valid
foreach ($sources as $source) { foreach ($sources as $source) {
//parsing for mastodon
if ($source->type == 'mastodon') { if ($source->type == 'mastodon') {
$url = 'https://' . $source->url;
} else {
$url = $source->url;
}
if ($this->urlExists($url)) {
array_push($cleanSources, [
'url' => $source->url,
'token' => $source->token,
'type' => $source->type,
'format' => $source->format]);
} else {
var_dump($url);
array_push($missing, ['source' => $url]);
}
}
//valid source url get compiled for unified
foreach ($cleanSources as $source) {
//check url to make sure it's cool
//parsing for mastodon
if ($source['type'] == 'mastodon') {
$result = []; $result = [];
if ($source->token == null) { if ($source['token'] == null) {
$result = \Mastodon::domain('https://' . $source->url) $result = \Mastodon::domain('https://' . $source['url'])
->get('/instance/domain_blocks'); ->get('/instance/domain_blocks');
} else { } else {
$result = \Mastodon::domain('https://' . $source->url) $result = \Mastodon::domain('https://' . $source['url'])
->token($source->token) ->token($source['token'])
->get('/instance/domain_blocks'); ->get('/instance/domain_blocks');
} }
@ -97,8 +123,8 @@ class LocationController extends Controller
} }
} }
//parsing for custom csv //parsing for custom csv
if ($source->type == 'custom' && $source->format == 'csv') { if ($source['type'] == 'custom' && $source['format'] == 'csv') {
$denylist = array_map('str_getcsv', file($source->url)); $denylist = array_map('str_getcsv', file($source['url']));
foreach ($denylist as $item) { foreach ($denylist as $item) {
$index = array_search($item[0], array_column($unified, 'url')); $index = array_search($item[0], array_column($unified, 'url'));
if ($index) { if ($index) {
@ -212,6 +238,26 @@ class LocationController extends Controller
} }
} }
//TODO: Send update post to TBS social account //TODO: Send update post to TBS social account
return back()->with('message', $duplicates . ' UPDATED - ' . $fresh . ' CREATED');
return back()->with('message', $duplicates . ' UPDATED - ' . $fresh . ' CREATED - ' . count($missing) . ' SOURCE(S) NOT CHECKED');
}
public function urlExists($url)
{
// Remove all illegal characters from a url
$url = filter_var($url, FILTER_SANITIZE_URL);
// Validate URI
if (
filter_var($url, FILTER_VALIDATE_URL) === false || // check only for http/https schemes.
!in_array(
strtolower(parse_url($url, PHP_URL_SCHEME)),
["http", "https"],
true
)
) {
return false;
} // Check that URL exists
$file_headers = @get_headers($url);
return !(!$file_headers || $file_headers[0] === "HTTP/1.1 404 Not Found");
} }
} }