diff --git a/app/Http/Controllers/Api/ImportController.php b/app/Http/Controllers/Api/ImportController.php index ebf8b550b2..2a4d91c3d9 100644 --- a/app/Http/Controllers/Api/ImportController.php +++ b/app/Http/Controllers/Api/ImportController.php @@ -9,12 +9,14 @@ use App\Http\Transformers\ImportsTransformer; use App\Models\Asset; use App\Models\Company; use App\Models\Import; +use Illuminate\Http\UploadedFile; use Illuminate\Support\Facades\Artisan; use Illuminate\Database\Eloquent\JsonEncodingException; use Illuminate\Support\Facades\Request; use Illuminate\Support\Facades\Session; use Illuminate\Support\Facades\Storage; use League\Csv\Reader; +use Onnov\DetectEncoding\EncodingDetector; use Symfony\Component\HttpFoundation\File\Exception\FileException; use Illuminate\Support\Facades\Log; use Illuminate\Http\JsonResponse; @@ -45,6 +47,8 @@ class ImportController extends Controller $path = config('app.private_uploads').'/imports'; $results = []; $import = new Import; + $detector = new EncodingDetector(); + foreach ($files as $file) { if (! in_array($file->getMimeType(), [ 'application/vnd.ms-excel', @@ -55,7 +59,6 @@ class ImportController extends Controller 'text/comma-separated-values', 'text/tsv', ])) { $results['error'] = 'File type must be CSV. Uploaded file is '.$file->getMimeType(); - return response()->json(Helper::formatStandardApiResponse('error', null, $results['error']), 422); } @@ -63,7 +66,25 @@ class ImportController extends Controller if (! ini_get('auto_detect_line_endings')) { ini_set('auto_detect_line_endings', '1'); } + $file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it? + $encoding = $detector->getEncoding($file_contents); + $reader = null; + if (strcasecmp($encoding, 'UTF-8') != 0) { + $transliterated = iconv($encoding, 'UTF-8', $file_contents); + if ($transliterated !== false) { + $tmpname = tempnam(sys_get_temp_dir(), ''); + $tmpresults = file_put_contents($tmpname, $transliterated); + if ($tmpresults !== false) { + $transliterated = null; //save on memory? + $newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded' + if ($newfile->isValid()) { + $file = $newfile; + } + } + } + } $reader = Reader::createFromFileObject($file->openFile('r')); //file pointer leak? + $file_contents = null; //try to save on memory, I guess? try { $import->header_row = $reader->fetchOne(0); diff --git a/composer.json b/composer.json index 865878280c..3040dc4243 100644 --- a/composer.json +++ b/composer.json @@ -20,6 +20,7 @@ "php": "^8.1", "ext-curl": "*", "ext-fileinfo": "*", + "ext-iconv": "*", "ext-json": "*", "ext-mbstring": "*", "ext-pdo": "*", @@ -55,6 +56,7 @@ "nunomaduro/collision": "^7.0", "okvpn/clock-lts": "^1.0", "onelogin/php-saml": "^3.4", + "onnov/detect-encoding": "^2.0", "osa-eg/laravel-teams-notification": "^2.1", "paragonie/constant_time_encoding": "^2.3", "paragonie/sodium_compat": "^1.19", diff --git a/composer.lock b/composer.lock index 17fe70c40a..b8e67655d1 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "0750e3a427347b2a56a05a8b9b533d48", + "content-hash": "2a6e7f5e039ee2f40605aefc5c5baf08", "packages": [ { "name": "alek13/slack", @@ -5574,6 +5574,70 @@ ], "time": "2024-05-30T15:14:26+00:00" }, + { + "name": "onnov/detect-encoding", + "version": "v2.0.0", + "source": { + "type": "git", + "url": "https://github.com/onnov/detect-encoding.git", + "reference": "6a8159ac3e6178ae043244b9d66a9b2701121e07" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/onnov/detect-encoding/zipball/6a8159ac3e6178ae043244b9d66a9b2701121e07", + "reference": "6a8159ac3e6178ae043244b9d66a9b2701121e07", + "shasum": "" + }, + "require": { + "ext-iconv": "*", + "php": ">=7.3" + }, + "require-dev": { + "infection/infection": "*", + "phpbench/phpbench": "*", + "phpcompatibility/php-compatibility": "*", + "phpmd/phpmd": "*", + "phpstan/phpstan": "*", + "phpstan/phpstan-strict-rules": "*", + "phpunit/phpunit": "*", + "roave/backward-compatibility-check": "*", + "squizlabs/php_codesniffer": "*" + }, + "type": "library", + "autoload": { + "psr-4": { + "Onnov\\DetectEncoding\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "onnov", + "email": "oblnn@yandex.ru" + } + ], + "description": "Text encoding definition class instead of mb_detect_encoding. Defines: utf-8, windows-1251, koi8-r, iso-8859-5, ibm866, .....", + "homepage": "https://github.com/onnov/detect-encoding", + "keywords": [ + "cyrillic", + "encoding", + "ibm866", + "iconv", + "iso-8859-5", + "koi8-r", + "mb_detect_encoding", + "utf-8", + "windows-1251" + ], + "support": { + "issues": "https://github.com/onnov/detect-encoding/issues", + "source": "https://github.com/onnov/detect-encoding/tree/v2.0.0" + }, + "time": "2021-01-04T14:29:34+00:00" + }, { "name": "osa-eg/laravel-teams-notification", "version": "v2.1.2", @@ -16570,6 +16634,7 @@ "php": "^8.1", "ext-curl": "*", "ext-fileinfo": "*", + "ext-iconv": "*", "ext-json": "*", "ext-mbstring": "*", "ext-pdo": "*" diff --git a/tests/Feature/Importing/Ui/ImportTest.php b/tests/Feature/Importing/Ui/ImportTest.php index 3493f47aff..4b811c4872 100644 --- a/tests/Feature/Importing/Ui/ImportTest.php +++ b/tests/Feature/Importing/Ui/ImportTest.php @@ -3,6 +3,8 @@ namespace Tests\Feature\Importing\Ui; use App\Models\User; +use Illuminate\Http\UploadedFile; +use PHPUnit\Framework\Attributes\Test; use Tests\TestCase; class ImportTest extends TestCase @@ -13,4 +15,35 @@ class ImportTest extends TestCase ->get(route('imports.index')) ->assertOk(); } + + public function testStoreInternationalAsset(): void + { + $evil_string = 'це'; //cyrillic - windows-1251 (ONE) + $csv_contents = "a,b,c\n$evil_string,$evil_string,$evil_string\n"; + + // now, deliberately transform our UTF-8 into Windows-1251 so we can test out the character-set detection + $transliterated_contents = iconv('UTF-8', 'WINDOWS-1251', $csv_contents); + //\Log::error("RAW TRANSLITERATED CONTENTS: $transliterated_contents"); // should show 'unicode missing glyph' symbol in logs. + + $this->actingAsForApi(User::factory()->superuser()->create()); + $results = $this->post(route('api.imports.store'), ['files' => [UploadedFile::fake()->createWithContent("myname.csv", $transliterated_contents)]]) + ->assertOk() + ->assertJsonStructure([ + "files" => [ + [ + "created_at", + "field_map", + "file_path", + "filesize", + "first_row", + "header_row", + "id", + "import_type", + "name", + ] + ] + ]); + \Log::error(print_r($results, true)); + $this->assertEquals($evil_string, $results->json()['files'][0]['first_row'][0]); + } } diff --git a/tests/Support/Importing/FileBuilder.php b/tests/Support/Importing/FileBuilder.php index fad40054b4..bf08dc96d1 100644 --- a/tests/Support/Importing/FileBuilder.php +++ b/tests/Support/Importing/FileBuilder.php @@ -206,7 +206,7 @@ abstract class FileBuilder * * @return string The filename. */ - public function saveToImportsDirectory(?string $filename = null): string + public function saveToImportsDirectory(?string $filename = null, ?string $locale = null): string { $filename ??= Str::random(40) . '.csv'; @@ -214,9 +214,15 @@ abstract class FileBuilder $stream = fopen(config('app.private_uploads') . "/imports/{$filename}", 'w'); foreach ($this->toCsv() as $row) { + if ($locale) { + $newrow = []; + foreach ($row as $index => $cell) { + $newrow[$index] = iconv('utf-8', $locale, (string) $cell); + } + $row = $newrow; + } fputcsv($stream, $row); } - return $filename; } finally { if (is_resource($stream)) {