Data collection script overhaul

The script for collection blocklist info was getting a little heavy, so
it's been split in two to make it more accessible and easier to
maintain. The first part collects data from current sources and stores
it for the second part of the script, which compiles all the data and
updated the database.

The Source model was expanded to track votes so block and silence counts
can be easily verified on the backend.
This commit is contained in:
ro 2024-02-17 19:33:35 -06:00
parent 1382976549
commit 7abd6d44a0
5 changed files with 98 additions and 105 deletions

View file

@ -15,14 +15,21 @@ class LocationController extends Controller
public function updateLocations()
{
$result = $this->update->locations();
$result = $this->update->data();
return back()->with(
'message',
$result['duplicates'] .
' UPDATED - ' . $result['fresh'] .
' CREATED - ' . count($result['missing']) .
' SOURCE(S) NOT CHECKED'
$result
);
}
public function compileLocations()
{
$result = $this->update->list();
return back()->with(
'message',
$result
);
}
}

View file

@ -11,5 +11,5 @@ class Source extends Model
protected $table = "source";
public $timestamps = false;
protected $fillable = ["url", "type", "active", "format", "token"];
protected $fillable = ["url", "type", "active", "format", "token", "list_data", "last_updated"];
}

View file

@ -6,6 +6,7 @@ use App\Models\Location;
use App\Repositories\LocationRepository;
use App\Models\Source;
use Ramsey\Uuid\Uuid;
use Carbon\Carbon;
class UpdateService
{
@ -18,43 +19,16 @@ class UpdateService
$this->locationRepository = $locationRepository;
}
public function locations()
public function data()
{
$duplicates = 0;
$fresh = 0;
$missing = [];
$unified = [];
$cleanSources = [];
$sources = Source::where("active", true)->get();
$missing = [];
$checked = [];
//checks source url to make sure they valid
foreach ($sources as $source) {
if ($source->type == 'mastodon') {
$url = 'https://' . $source->url;
} else {
$url = $source->url;
}
if ($this->urlExists($url)) {
array_push($cleanSources, [
'url' => $source->url,
'token' => $source->token,
'type' => $source->type,
'format' => $source->format]);
} else {
var_dump($url);
array_push($missing, ['source' => $url]);
}
}
//valid source url get compiled for unified
foreach ($cleanSources as $source) {
//check url to make sure it's cool
//parsing for mastodon
if ($source['type'] == 'mastodon') {
if ($this->urlExists('https://' . $source->url)) {
$result = [];
if ($source['type'] == 'mastodon') {
if ($source['token'] == null) {
$result = \Mastodon::domain('https://' . $source['url'])
->get('/instance/domain_blocks');
@ -63,69 +37,72 @@ class UpdateService
->token($source['token'])
->get('/instance/domain_blocks');
}
foreach ($result as $item) {
$index = array_search($item['domain'], array_column($unified, 'url'));
if ($index) {
//if there is a match, update the count
if ($item['severity'] == "suspend" || $item['severity'] == "defederate") {
++$unified[$index]['block_count'];
} else {
++$unified[$index]['silence_count'];
}
} else {
$silence = 0;
$suspend = 0;
if ($item['severity'] == "suspend" || $item['severity'] == "defederate") {
++$suspend;
} else {
++$silence;
}
array_push($unified, [
'name' => $item['domain'],
'url' => $item['domain'],
'rating' => $item['severity'],
'comment' => $item['comment'],
'block_count' => $suspend,
'silence_count' => $silence,
]);
}
}
}
//parsing for custom csv
if ($source['type'] == 'custom' && $source['format'] == 'csv') {
$denylist = array_map('str_getcsv', file($source['url']));
} elseif ($source['type'] == 'custom' && $source['format'] == 'csv') {
$denylist = array_map('str_getcsv', file('https://' . $source['url']));
foreach ($denylist as $item) {
$index = array_search($item[0], array_column($unified, 'url'));
array_push($result, [
'domain' => $item[0],
'severity' => $item[1],
'comment' => $item[2]]);
}
}
array_push($checked, ['source' => $source->url]);
} else {
array_push($missing, ['source' => $source->url]);
};
$source->list_data = json_encode($result);
$source->last_updated = Carbon::now();
$source->save();
}
return count($checked) . ' SOURCES UPDATED - ' . count($missing) . ' SOURCES NOT CHECKED';
}
public function list()
{
$duplicates = 0;
$fresh = 0;
$unified = [];
$sources = Source::where("active", true)->get();
foreach ($sources as $source) {
//$listData = json_decode();
foreach (json_decode($source->list_data) as $item) {
$index = array_search($item->domain, array_column($unified, 'url'));
if ($index) {
//if there is a match, update the count
if ($item[1] == "suspend" || $item['severity'] == "defederate") {
if ($item->severity == "suspend" || $item->severity == "defederate") {
++$unified[$index]['block_count'];
array_push($unified[$index]['block_vote'], $source->url);
} else {
++$unified[$index]['silence_count'];
array_push($unified[$index]['silence_vote'], $source->url);
}
} else {
$silence = 0;
$suspend = 0;
if ($item[1] == "suspend" || $item[1] == "defederate") {
$block_vote = [];
$silence_vote = [];
if ($item->severity == "suspend" || $item->severity == "defederate") {
++$suspend;
array_push($block_vote, $source->url);
} else {
++$silence;
array_push($silence_vote, $source->url);
}
array_push($unified, [
'name' => $item[0],
'url' => $item[0],
'rating' => $item[1],
'comment' => $item[2],
'name' => $item->domain,
'url' => $item->domain,
'rating' => $item->severity,
'comment' => $item->comment,
'block_count' => $suspend,
'silence_count' => $silence,
'block_vote' => $block_vote,
'silence_vote' => $silence_vote,
]);
}
}
}
}
//once the unified list is created, update current entries or create fresh ones
foreach ($unified as $item) {
$location = $this->locationRepository->getLocation($item['url']);
@ -134,12 +111,18 @@ class UpdateService
//update block count for existing item
$location->block_count = $item['block_count'];
$location->block_vote = [];
$location->block_vote = $item['block_vote'];
$location->silence_count = $item['silence_count'];
$location->silence_vote = [];
$location->silence_vote = $item['silence_vote'];
$location->actions_count = $item['block_count'] + $item['silence_count'];
if (($item['block_count'] + $item['silence_count']) < 2) {
$location->active = false;
} else {
$location->active = true;
}
//replace null with empty array
@ -169,14 +152,15 @@ class UpdateService
'tags' => 'poor moderation, hate speech',
'images' => json_encode($images),
'block_count' => $item['block_count'],
'block_vote' => $item['block_vote'],
'silence_count' => $item['silence_count'],
'silence_vote' => $item['silence_vote'],
'actions_cont' => $item['block_count'] + $item['silence_count']
]);
}
}
//TODO: Send update post to TBS social account
return ['duplicates' => $duplicates, 'fresh' => $fresh, 'missing' => $missing];
return $duplicates . ' LOCATIONS UPDATED | ' . $fresh . ' NEW LOCATIONS CREATED';
}
public function urlExists($url)

View file

@ -6,7 +6,8 @@
<section>
<article>
<h2>Member Listing </h2>
<a href="/den/admin/update">UPDATE LOCATIONS</a>
<a href="/den/admin/update">UPDATE LOCATIONS</a><br />
<a href="/den/admin/compile">COMPILE LOCATIONS</a>
</article>
</section>
@endsection

View file

@ -45,4 +45,5 @@ Route::group(['prefix' => 'den', 'middleware' => 'member.check'], function () {
Route::post("/locations/add", [LocationController::class, 'addLocation']);
//admin actions
Route::get("/admin/update", [LocationController::class, 'updateLocations']);
Route::get("/admin/compile", [LocationController::class, 'compileLocations']);
});