forked from projects/thebadspace
Data collection script overhaul
The script for collection blocklist info was getting a little heavy, so it's been split in two to make it more accessible and easier to maintain. The first part collects data from current sources and stores it for the second part of the script, which compiles all the data and updated the database. The Source model was expanded to track votes so block and silence counts can be easily verified on the backend.
This commit is contained in:
parent
1382976549
commit
7abd6d44a0
5 changed files with 98 additions and 105 deletions
|
@ -15,14 +15,21 @@ class LocationController extends Controller
|
||||||
|
|
||||||
public function updateLocations()
|
public function updateLocations()
|
||||||
{
|
{
|
||||||
$result = $this->update->locations();
|
$result = $this->update->data();
|
||||||
|
|
||||||
return back()->with(
|
return back()->with(
|
||||||
'message',
|
'message',
|
||||||
$result['duplicates'] .
|
$result
|
||||||
' UPDATED - ' . $result['fresh'] .
|
);
|
||||||
' CREATED - ' . count($result['missing']) .
|
}
|
||||||
' SOURCE(S) NOT CHECKED'
|
|
||||||
|
public function compileLocations()
|
||||||
|
{
|
||||||
|
$result = $this->update->list();
|
||||||
|
|
||||||
|
return back()->with(
|
||||||
|
'message',
|
||||||
|
$result
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,5 +11,5 @@ class Source extends Model
|
||||||
|
|
||||||
protected $table = "source";
|
protected $table = "source";
|
||||||
public $timestamps = false;
|
public $timestamps = false;
|
||||||
protected $fillable = ["url", "type", "active", "format", "token"];
|
protected $fillable = ["url", "type", "active", "format", "token", "list_data", "last_updated"];
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ use App\Models\Location;
|
||||||
use App\Repositories\LocationRepository;
|
use App\Repositories\LocationRepository;
|
||||||
use App\Models\Source;
|
use App\Models\Source;
|
||||||
use Ramsey\Uuid\Uuid;
|
use Ramsey\Uuid\Uuid;
|
||||||
|
use Carbon\Carbon;
|
||||||
|
|
||||||
class UpdateService
|
class UpdateService
|
||||||
{
|
{
|
||||||
|
@ -18,115 +19,91 @@ class UpdateService
|
||||||
$this->locationRepository = $locationRepository;
|
$this->locationRepository = $locationRepository;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function locations()
|
public function data()
|
||||||
|
{
|
||||||
|
$sources = Source::where("active", true)->get();
|
||||||
|
$missing = [];
|
||||||
|
$checked = [];
|
||||||
|
//checks source url to make sure they valid
|
||||||
|
foreach ($sources as $source) {
|
||||||
|
if ($this->urlExists('https://' . $source->url)) {
|
||||||
|
$result = [];
|
||||||
|
if ($source['type'] == 'mastodon') {
|
||||||
|
if ($source['token'] == null) {
|
||||||
|
$result = \Mastodon::domain('https://' . $source['url'])
|
||||||
|
->get('/instance/domain_blocks');
|
||||||
|
} else {
|
||||||
|
$result = \Mastodon::domain('https://' . $source['url'])
|
||||||
|
->token($source['token'])
|
||||||
|
->get('/instance/domain_blocks');
|
||||||
|
}
|
||||||
|
} elseif ($source['type'] == 'custom' && $source['format'] == 'csv') {
|
||||||
|
$denylist = array_map('str_getcsv', file('https://' . $source['url']));
|
||||||
|
foreach ($denylist as $item) {
|
||||||
|
array_push($result, [
|
||||||
|
'domain' => $item[0],
|
||||||
|
'severity' => $item[1],
|
||||||
|
'comment' => $item[2]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
array_push($checked, ['source' => $source->url]);
|
||||||
|
} else {
|
||||||
|
array_push($missing, ['source' => $source->url]);
|
||||||
|
};
|
||||||
|
$source->list_data = json_encode($result);
|
||||||
|
$source->last_updated = Carbon::now();
|
||||||
|
$source->save();
|
||||||
|
}
|
||||||
|
return count($checked) . ' SOURCES UPDATED - ' . count($missing) . ' SOURCES NOT CHECKED';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function list()
|
||||||
{
|
{
|
||||||
$duplicates = 0;
|
$duplicates = 0;
|
||||||
$fresh = 0;
|
$fresh = 0;
|
||||||
$missing = [];
|
$unified = [];
|
||||||
|
|
||||||
$unified = [];
|
$sources = Source::where("active", true)->get();
|
||||||
$cleanSources = [];
|
|
||||||
$sources = Source::where("active", true)->get();
|
|
||||||
|
|
||||||
//checks source url to make sure they valid
|
|
||||||
foreach ($sources as $source) {
|
foreach ($sources as $source) {
|
||||||
if ($source->type == 'mastodon') {
|
//$listData = json_decode();
|
||||||
$url = 'https://' . $source->url;
|
foreach (json_decode($source->list_data) as $item) {
|
||||||
} else {
|
$index = array_search($item->domain, array_column($unified, 'url'));
|
||||||
$url = $source->url;
|
if ($index) {
|
||||||
}
|
//if there is a match, update the count
|
||||||
|
if ($item->severity == "suspend" || $item->severity == "defederate") {
|
||||||
if ($this->urlExists($url)) {
|
++$unified[$index]['block_count'];
|
||||||
array_push($cleanSources, [
|
array_push($unified[$index]['block_vote'], $source->url);
|
||||||
'url' => $source->url,
|
} else {
|
||||||
'token' => $source->token,
|
++$unified[$index]['silence_count'];
|
||||||
'type' => $source->type,
|
array_push($unified[$index]['silence_vote'], $source->url);
|
||||||
'format' => $source->format]);
|
}
|
||||||
} else {
|
|
||||||
var_dump($url);
|
|
||||||
array_push($missing, ['source' => $url]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//valid source url get compiled for unified
|
|
||||||
foreach ($cleanSources as $source) {
|
|
||||||
//check url to make sure it's cool
|
|
||||||
|
|
||||||
//parsing for mastodon
|
|
||||||
if ($source['type'] == 'mastodon') {
|
|
||||||
$result = [];
|
|
||||||
if ($source['token'] == null) {
|
|
||||||
$result = \Mastodon::domain('https://' . $source['url'])
|
|
||||||
->get('/instance/domain_blocks');
|
|
||||||
} else {
|
} else {
|
||||||
$result = \Mastodon::domain('https://' . $source['url'])
|
$silence = 0;
|
||||||
->token($source['token'])
|
$suspend = 0;
|
||||||
->get('/instance/domain_blocks');
|
$block_vote = [];
|
||||||
}
|
$silence_vote = [];
|
||||||
|
if ($item->severity == "suspend" || $item->severity == "defederate") {
|
||||||
foreach ($result as $item) {
|
++$suspend;
|
||||||
$index = array_search($item['domain'], array_column($unified, 'url'));
|
array_push($block_vote, $source->url);
|
||||||
if ($index) {
|
|
||||||
//if there is a match, update the count
|
|
||||||
if ($item['severity'] == "suspend" || $item['severity'] == "defederate") {
|
|
||||||
++$unified[$index]['block_count'];
|
|
||||||
} else {
|
|
||||||
++$unified[$index]['silence_count'];
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
$silence = 0;
|
++$silence;
|
||||||
$suspend = 0;
|
array_push($silence_vote, $source->url);
|
||||||
if ($item['severity'] == "suspend" || $item['severity'] == "defederate") {
|
|
||||||
++$suspend;
|
|
||||||
} else {
|
|
||||||
++$silence;
|
|
||||||
}
|
|
||||||
array_push($unified, [
|
|
||||||
'name' => $item['domain'],
|
|
||||||
'url' => $item['domain'],
|
|
||||||
'rating' => $item['severity'],
|
|
||||||
'comment' => $item['comment'],
|
|
||||||
'block_count' => $suspend,
|
|
||||||
'silence_count' => $silence,
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//parsing for custom csv
|
|
||||||
if ($source['type'] == 'custom' && $source['format'] == 'csv') {
|
|
||||||
$denylist = array_map('str_getcsv', file($source['url']));
|
|
||||||
foreach ($denylist as $item) {
|
|
||||||
$index = array_search($item[0], array_column($unified, 'url'));
|
|
||||||
if ($index) {
|
|
||||||
//if there is a match, update the count
|
|
||||||
if ($item[1] == "suspend" || $item['severity'] == "defederate") {
|
|
||||||
++$unified[$index]['block_count'];
|
|
||||||
} else {
|
|
||||||
++$unified[$index]['silence_count'];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
$silence = 0;
|
|
||||||
$suspend = 0;
|
|
||||||
if ($item[1] == "suspend" || $item[1] == "defederate") {
|
|
||||||
++$suspend;
|
|
||||||
} else {
|
|
||||||
++$silence;
|
|
||||||
}
|
|
||||||
array_push($unified, [
|
|
||||||
'name' => $item[0],
|
|
||||||
'url' => $item[0],
|
|
||||||
'rating' => $item[1],
|
|
||||||
'comment' => $item[2],
|
|
||||||
'block_count' => $suspend,
|
|
||||||
'silence_count' => $silence,
|
|
||||||
]);
|
|
||||||
}
|
}
|
||||||
|
array_push($unified, [
|
||||||
|
'name' => $item->domain,
|
||||||
|
'url' => $item->domain,
|
||||||
|
'rating' => $item->severity,
|
||||||
|
'comment' => $item->comment,
|
||||||
|
'block_count' => $suspend,
|
||||||
|
'silence_count' => $silence,
|
||||||
|
'block_vote' => $block_vote,
|
||||||
|
'silence_vote' => $silence_vote,
|
||||||
|
]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//once the unified list is created, update current entries or create fresh ones
|
|
||||||
|
|
||||||
foreach ($unified as $item) {
|
foreach ($unified as $item) {
|
||||||
$location = $this->locationRepository->getLocation($item['url']);
|
$location = $this->locationRepository->getLocation($item['url']);
|
||||||
if ($location) {
|
if ($location) {
|
||||||
|
@ -134,12 +111,18 @@ class UpdateService
|
||||||
//update block count for existing item
|
//update block count for existing item
|
||||||
|
|
||||||
$location->block_count = $item['block_count'];
|
$location->block_count = $item['block_count'];
|
||||||
|
$location->block_vote = [];
|
||||||
|
$location->block_vote = $item['block_vote'];
|
||||||
$location->silence_count = $item['silence_count'];
|
$location->silence_count = $item['silence_count'];
|
||||||
|
$location->silence_vote = [];
|
||||||
|
$location->silence_vote = $item['silence_vote'];
|
||||||
|
|
||||||
$location->actions_count = $item['block_count'] + $item['silence_count'];
|
$location->actions_count = $item['block_count'] + $item['silence_count'];
|
||||||
|
|
||||||
if (($item['block_count'] + $item['silence_count']) < 2) {
|
if (($item['block_count'] + $item['silence_count']) < 2) {
|
||||||
$location->active = false;
|
$location->active = false;
|
||||||
|
} else {
|
||||||
|
$location->active = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
//replace null with empty array
|
//replace null with empty array
|
||||||
|
@ -169,14 +152,15 @@ class UpdateService
|
||||||
'tags' => 'poor moderation, hate speech',
|
'tags' => 'poor moderation, hate speech',
|
||||||
'images' => json_encode($images),
|
'images' => json_encode($images),
|
||||||
'block_count' => $item['block_count'],
|
'block_count' => $item['block_count'],
|
||||||
|
'block_vote' => $item['block_vote'],
|
||||||
'silence_count' => $item['silence_count'],
|
'silence_count' => $item['silence_count'],
|
||||||
|
'silence_vote' => $item['silence_vote'],
|
||||||
'actions_cont' => $item['block_count'] + $item['silence_count']
|
'actions_cont' => $item['block_count'] + $item['silence_count']
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//TODO: Send update post to TBS social account
|
//TODO: Send update post to TBS social account
|
||||||
|
return $duplicates . ' LOCATIONS UPDATED | ' . $fresh . ' NEW LOCATIONS CREATED';
|
||||||
return ['duplicates' => $duplicates, 'fresh' => $fresh, 'missing' => $missing];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function urlExists($url)
|
public function urlExists($url)
|
||||||
|
|
|
@ -6,7 +6,8 @@
|
||||||
<section>
|
<section>
|
||||||
<article>
|
<article>
|
||||||
<h2>Member Listing </h2>
|
<h2>Member Listing </h2>
|
||||||
<a href="/den/admin/update">UPDATE LOCATIONS</a>
|
<a href="/den/admin/update">UPDATE LOCATIONS</a><br />
|
||||||
|
<a href="/den/admin/compile">COMPILE LOCATIONS</a>
|
||||||
</article>
|
</article>
|
||||||
</section>
|
</section>
|
||||||
@endsection
|
@endsection
|
|
@ -45,4 +45,5 @@ Route::group(['prefix' => 'den', 'middleware' => 'member.check'], function () {
|
||||||
Route::post("/locations/add", [LocationController::class, 'addLocation']);
|
Route::post("/locations/add", [LocationController::class, 'addLocation']);
|
||||||
//admin actions
|
//admin actions
|
||||||
Route::get("/admin/update", [LocationController::class, 'updateLocations']);
|
Route::get("/admin/update", [LocationController::class, 'updateLocations']);
|
||||||
|
Route::get("/admin/compile", [LocationController::class, 'compileLocations']);
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in a new issue