From 7abd6d44a0deaa5988099ab5a30d1a8127ccaf44 Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 17 Feb 2024 19:33:35 -0600 Subject: [PATCH] Data collection script overhaul The script for collection blocklist info was getting a little heavy, so it's been split in two to make it more accessible and easier to maintain. The first part collects data from current sources and stores it for the second part of the script, which compiles all the data and updated the database. The Source model was expanded to track votes so block and silence counts can be easily verified on the backend. --- app/Http/Controllers/LocationController.php | 17 +- app/Models/Source.php | 2 +- app/Services/UpdateService.php | 180 +++++++++----------- resources/views/back/member.blade.php | 3 +- routes/web.php | 1 + 5 files changed, 98 insertions(+), 105 deletions(-) diff --git a/app/Http/Controllers/LocationController.php b/app/Http/Controllers/LocationController.php index a2fc4cc..c6ada97 100644 --- a/app/Http/Controllers/LocationController.php +++ b/app/Http/Controllers/LocationController.php @@ -15,14 +15,21 @@ class LocationController extends Controller public function updateLocations() { - $result = $this->update->locations(); + $result = $this->update->data(); return back()->with( 'message', - $result['duplicates'] . - ' UPDATED - ' . $result['fresh'] . - ' CREATED - ' . count($result['missing']) . - ' SOURCE(S) NOT CHECKED' + $result + ); + } + + public function compileLocations() + { + $result = $this->update->list(); + + return back()->with( + 'message', + $result ); } } diff --git a/app/Models/Source.php b/app/Models/Source.php index 7e455b6..e66fad9 100644 --- a/app/Models/Source.php +++ b/app/Models/Source.php @@ -11,5 +11,5 @@ class Source extends Model protected $table = "source"; public $timestamps = false; - protected $fillable = ["url", "type", "active", "format", "token"]; + protected $fillable = ["url", "type", "active", "format", "token", "list_data", "last_updated"]; } diff --git a/app/Services/UpdateService.php b/app/Services/UpdateService.php index 5f93e7c..3b8ce34 100644 --- a/app/Services/UpdateService.php +++ b/app/Services/UpdateService.php @@ -6,6 +6,7 @@ use App\Models\Location; use App\Repositories\LocationRepository; use App\Models\Source; use Ramsey\Uuid\Uuid; +use Carbon\Carbon; class UpdateService { @@ -18,115 +19,91 @@ class UpdateService $this->locationRepository = $locationRepository; } - public function locations() + public function data() + { + $sources = Source::where("active", true)->get(); + $missing = []; + $checked = []; + //checks source url to make sure they valid + foreach ($sources as $source) { + if ($this->urlExists('https://' . $source->url)) { + $result = []; + if ($source['type'] == 'mastodon') { + if ($source['token'] == null) { + $result = \Mastodon::domain('https://' . $source['url']) + ->get('/instance/domain_blocks'); + } else { + $result = \Mastodon::domain('https://' . $source['url']) + ->token($source['token']) + ->get('/instance/domain_blocks'); + } + } elseif ($source['type'] == 'custom' && $source['format'] == 'csv') { + $denylist = array_map('str_getcsv', file('https://' . $source['url'])); + foreach ($denylist as $item) { + array_push($result, [ + 'domain' => $item[0], + 'severity' => $item[1], + 'comment' => $item[2]]); + } + } + array_push($checked, ['source' => $source->url]); + } else { + array_push($missing, ['source' => $source->url]); + }; + $source->list_data = json_encode($result); + $source->last_updated = Carbon::now(); + $source->save(); + } + return count($checked) . ' SOURCES UPDATED - ' . count($missing) . ' SOURCES NOT CHECKED'; + } + + public function list() { $duplicates = 0; $fresh = 0; - $missing = []; + $unified = []; - $unified = []; - $cleanSources = []; - $sources = Source::where("active", true)->get(); + $sources = Source::where("active", true)->get(); - //checks source url to make sure they valid foreach ($sources as $source) { - if ($source->type == 'mastodon') { - $url = 'https://' . $source->url; - } else { - $url = $source->url; - } - - if ($this->urlExists($url)) { - array_push($cleanSources, [ - 'url' => $source->url, - 'token' => $source->token, - 'type' => $source->type, - 'format' => $source->format]); - } else { - var_dump($url); - array_push($missing, ['source' => $url]); - } - } - - //valid source url get compiled for unified - foreach ($cleanSources as $source) { - //check url to make sure it's cool - - //parsing for mastodon - if ($source['type'] == 'mastodon') { - $result = []; - if ($source['token'] == null) { - $result = \Mastodon::domain('https://' . $source['url']) - ->get('/instance/domain_blocks'); + //$listData = json_decode(); + foreach (json_decode($source->list_data) as $item) { + $index = array_search($item->domain, array_column($unified, 'url')); + if ($index) { + //if there is a match, update the count + if ($item->severity == "suspend" || $item->severity == "defederate") { + ++$unified[$index]['block_count']; + array_push($unified[$index]['block_vote'], $source->url); + } else { + ++$unified[$index]['silence_count']; + array_push($unified[$index]['silence_vote'], $source->url); + } } else { - $result = \Mastodon::domain('https://' . $source['url']) - ->token($source['token']) - ->get('/instance/domain_blocks'); - } - - foreach ($result as $item) { - $index = array_search($item['domain'], array_column($unified, 'url')); - if ($index) { - //if there is a match, update the count - if ($item['severity'] == "suspend" || $item['severity'] == "defederate") { - ++$unified[$index]['block_count']; - } else { - ++$unified[$index]['silence_count']; - } + $silence = 0; + $suspend = 0; + $block_vote = []; + $silence_vote = []; + if ($item->severity == "suspend" || $item->severity == "defederate") { + ++$suspend; + array_push($block_vote, $source->url); } else { - $silence = 0; - $suspend = 0; - if ($item['severity'] == "suspend" || $item['severity'] == "defederate") { - ++$suspend; - } else { - ++$silence; - } - array_push($unified, [ - 'name' => $item['domain'], - 'url' => $item['domain'], - 'rating' => $item['severity'], - 'comment' => $item['comment'], - 'block_count' => $suspend, - 'silence_count' => $silence, - ]); - } - } - } - //parsing for custom csv - if ($source['type'] == 'custom' && $source['format'] == 'csv') { - $denylist = array_map('str_getcsv', file($source['url'])); - foreach ($denylist as $item) { - $index = array_search($item[0], array_column($unified, 'url')); - if ($index) { - //if there is a match, update the count - if ($item[1] == "suspend" || $item['severity'] == "defederate") { - ++$unified[$index]['block_count']; - } else { - ++$unified[$index]['silence_count']; - } - } else { - $silence = 0; - $suspend = 0; - if ($item[1] == "suspend" || $item[1] == "defederate") { - ++$suspend; - } else { - ++$silence; - } - array_push($unified, [ - 'name' => $item[0], - 'url' => $item[0], - 'rating' => $item[1], - 'comment' => $item[2], - 'block_count' => $suspend, - 'silence_count' => $silence, - ]); + ++$silence; + array_push($silence_vote, $source->url); } + array_push($unified, [ + 'name' => $item->domain, + 'url' => $item->domain, + 'rating' => $item->severity, + 'comment' => $item->comment, + 'block_count' => $suspend, + 'silence_count' => $silence, + 'block_vote' => $block_vote, + 'silence_vote' => $silence_vote, + ]); } } } - //once the unified list is created, update current entries or create fresh ones - foreach ($unified as $item) { $location = $this->locationRepository->getLocation($item['url']); if ($location) { @@ -134,12 +111,18 @@ class UpdateService //update block count for existing item $location->block_count = $item['block_count']; + $location->block_vote = []; + $location->block_vote = $item['block_vote']; $location->silence_count = $item['silence_count']; + $location->silence_vote = []; + $location->silence_vote = $item['silence_vote']; $location->actions_count = $item['block_count'] + $item['silence_count']; if (($item['block_count'] + $item['silence_count']) < 2) { $location->active = false; + } else { + $location->active = true; } //replace null with empty array @@ -169,14 +152,15 @@ class UpdateService 'tags' => 'poor moderation, hate speech', 'images' => json_encode($images), 'block_count' => $item['block_count'], + 'block_vote' => $item['block_vote'], 'silence_count' => $item['silence_count'], + 'silence_vote' => $item['silence_vote'], 'actions_cont' => $item['block_count'] + $item['silence_count'] ]); } } //TODO: Send update post to TBS social account - - return ['duplicates' => $duplicates, 'fresh' => $fresh, 'missing' => $missing]; + return $duplicates . ' LOCATIONS UPDATED | ' . $fresh . ' NEW LOCATIONS CREATED'; } public function urlExists($url) diff --git a/resources/views/back/member.blade.php b/resources/views/back/member.blade.php index a458a17..62b033c 100644 --- a/resources/views/back/member.blade.php +++ b/resources/views/back/member.blade.php @@ -6,7 +6,8 @@
@endsection \ No newline at end of file diff --git a/routes/web.php b/routes/web.php index fbab175..ad95f26 100644 --- a/routes/web.php +++ b/routes/web.php @@ -45,4 +45,5 @@ Route::group(['prefix' => 'den', 'middleware' => 'member.check'], function () { Route::post("/locations/add", [LocationController::class, 'addLocation']); //admin actions Route::get("/admin/update", [LocationController::class, 'updateLocations']); + Route::get("/admin/compile", [LocationController::class, 'compileLocations']); });