forked from projects/thebadspace
ro
7abd6d44a0
The script for collection blocklist info was getting a little heavy, so it's been split in two to make it more accessible and easier to maintain. The first part collects data from current sources and stores it for the second part of the script, which compiles all the data and updated the database. The Source model was expanded to track votes so block and silence counts can be easily verified on the backend.
185 lines
7.2 KiB
PHP
185 lines
7.2 KiB
PHP
<?php
|
|
|
|
namespace App\Services;
|
|
|
|
use App\Models\Location;
|
|
use App\Repositories\LocationRepository;
|
|
use App\Models\Source;
|
|
use Ramsey\Uuid\Uuid;
|
|
use Carbon\Carbon;
|
|
|
|
class UpdateService
|
|
{
|
|
private $limit = 15;
|
|
protected $model;
|
|
protected $locationRepository;
|
|
|
|
public function __construct(LocationRepository $locationRepository)
|
|
{
|
|
$this->locationRepository = $locationRepository;
|
|
}
|
|
|
|
public function data()
|
|
{
|
|
$sources = Source::where("active", true)->get();
|
|
$missing = [];
|
|
$checked = [];
|
|
//checks source url to make sure they valid
|
|
foreach ($sources as $source) {
|
|
if ($this->urlExists('https://' . $source->url)) {
|
|
$result = [];
|
|
if ($source['type'] == 'mastodon') {
|
|
if ($source['token'] == null) {
|
|
$result = \Mastodon::domain('https://' . $source['url'])
|
|
->get('/instance/domain_blocks');
|
|
} else {
|
|
$result = \Mastodon::domain('https://' . $source['url'])
|
|
->token($source['token'])
|
|
->get('/instance/domain_blocks');
|
|
}
|
|
} elseif ($source['type'] == 'custom' && $source['format'] == 'csv') {
|
|
$denylist = array_map('str_getcsv', file('https://' . $source['url']));
|
|
foreach ($denylist as $item) {
|
|
array_push($result, [
|
|
'domain' => $item[0],
|
|
'severity' => $item[1],
|
|
'comment' => $item[2]]);
|
|
}
|
|
}
|
|
array_push($checked, ['source' => $source->url]);
|
|
} else {
|
|
array_push($missing, ['source' => $source->url]);
|
|
};
|
|
$source->list_data = json_encode($result);
|
|
$source->last_updated = Carbon::now();
|
|
$source->save();
|
|
}
|
|
return count($checked) . ' SOURCES UPDATED - ' . count($missing) . ' SOURCES NOT CHECKED';
|
|
}
|
|
|
|
public function list()
|
|
{
|
|
$duplicates = 0;
|
|
$fresh = 0;
|
|
$unified = [];
|
|
|
|
$sources = Source::where("active", true)->get();
|
|
|
|
foreach ($sources as $source) {
|
|
//$listData = json_decode();
|
|
foreach (json_decode($source->list_data) as $item) {
|
|
$index = array_search($item->domain, array_column($unified, 'url'));
|
|
if ($index) {
|
|
//if there is a match, update the count
|
|
if ($item->severity == "suspend" || $item->severity == "defederate") {
|
|
++$unified[$index]['block_count'];
|
|
array_push($unified[$index]['block_vote'], $source->url);
|
|
} else {
|
|
++$unified[$index]['silence_count'];
|
|
array_push($unified[$index]['silence_vote'], $source->url);
|
|
}
|
|
} else {
|
|
$silence = 0;
|
|
$suspend = 0;
|
|
$block_vote = [];
|
|
$silence_vote = [];
|
|
if ($item->severity == "suspend" || $item->severity == "defederate") {
|
|
++$suspend;
|
|
array_push($block_vote, $source->url);
|
|
} else {
|
|
++$silence;
|
|
array_push($silence_vote, $source->url);
|
|
}
|
|
array_push($unified, [
|
|
'name' => $item->domain,
|
|
'url' => $item->domain,
|
|
'rating' => $item->severity,
|
|
'comment' => $item->comment,
|
|
'block_count' => $suspend,
|
|
'silence_count' => $silence,
|
|
'block_vote' => $block_vote,
|
|
'silence_vote' => $silence_vote,
|
|
]);
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach ($unified as $item) {
|
|
$location = $this->locationRepository->getLocation($item['url']);
|
|
if ($location) {
|
|
++$duplicates;
|
|
//update block count for existing item
|
|
|
|
$location->block_count = $item['block_count'];
|
|
$location->block_vote = [];
|
|
$location->block_vote = $item['block_vote'];
|
|
$location->silence_count = $item['silence_count'];
|
|
$location->silence_vote = [];
|
|
$location->silence_vote = $item['silence_vote'];
|
|
|
|
$location->actions_count = $item['block_count'] + $item['silence_count'];
|
|
|
|
if (($item['block_count'] + $item['silence_count']) < 2) {
|
|
$location->active = false;
|
|
} else {
|
|
$location->active = true;
|
|
}
|
|
|
|
//replace null with empty array
|
|
if ($location->images == null) {
|
|
$location->images = [];
|
|
};
|
|
$location->save();
|
|
} else {
|
|
// make new entries for instances not present
|
|
++$fresh;
|
|
$images = [];
|
|
$rating = ($item['rating'] == 'defederate') ? 'suspend' : $item['rating'];
|
|
|
|
$status = true;
|
|
if (($item['block_count'] + $item['silence_count']) < 2) {
|
|
$status = false;
|
|
}
|
|
|
|
$new = Location::create([
|
|
'uuid' => Uuid::uuid4(),
|
|
'name' => $item['url'],
|
|
'url' => $item['url'],
|
|
'description' => ($item['comment'] != null) ? $item['comment'] : "no description",
|
|
'active' => $status,
|
|
'rating' => $rating,
|
|
'added_by' => 1,
|
|
'tags' => 'poor moderation, hate speech',
|
|
'images' => json_encode($images),
|
|
'block_count' => $item['block_count'],
|
|
'block_vote' => $item['block_vote'],
|
|
'silence_count' => $item['silence_count'],
|
|
'silence_vote' => $item['silence_vote'],
|
|
'actions_cont' => $item['block_count'] + $item['silence_count']
|
|
]);
|
|
}
|
|
}
|
|
//TODO: Send update post to TBS social account
|
|
return $duplicates . ' LOCATIONS UPDATED | ' . $fresh . ' NEW LOCATIONS CREATED';
|
|
}
|
|
|
|
public function urlExists($url)
|
|
{
|
|
// Remove all illegal characters from a url
|
|
$url = filter_var($url, FILTER_SANITIZE_URL);
|
|
// Validate URI
|
|
if (
|
|
filter_var($url, FILTER_VALIDATE_URL) === false || // check only for http/https schemes.
|
|
!in_array(
|
|
strtolower(parse_url($url, PHP_URL_SCHEME)),
|
|
["http", "https"],
|
|
true
|
|
)
|
|
) {
|
|
return false;
|
|
} // Check that URL exists
|
|
$file_headers = @get_headers($url);
|
|
return !(!$file_headers || $file_headers[0] === "HTTP/1.1 404 Not Found");
|
|
}
|
|
}
|