diff --git a/app/Importer/Importer.php b/app/Importer/Importer.php index 907c8b72c5..48cdc7266c 100644 --- a/app/Importer/Importer.php +++ b/app/Importer/Importer.php @@ -13,6 +13,7 @@ use Illuminate\Support\Facades\Auth; use Illuminate\Support\Facades\DB; use League\Csv\Reader; use Illuminate\Support\Facades\Log; +use Onnov\DetectEncoding\EncodingDetector; abstract class Importer { @@ -124,11 +125,28 @@ abstract class Importer if (! ini_get('auto_detect_line_endings')) { ini_set('auto_detect_line_endings', '1'); } + $detector = new EncodingDetector(); + // By default the importer passes a url to the file. // However, for testing we also support passing a string directly if (is_file($file)) { + $file_contents = file_get_contents($file); // TODO - this loads up the file in memory! Which could be 'big' and thus, this could be 'bad' + } else { + $file_contents = $file; + } + $encoding = $detector->getEncoding($file_contents); + \Log::debug("DETECTED ENCODING IS: $encoding"); + $file_contents = null; //try to save some memory? + if (is_file($file)) { + if ($encoding && strcasecmp($encoding, 'UTF-8') != 0) { + $file = "php://filter/convert.iconv.$encoding.utf-8/resource=".$file; + } $this->csv = Reader::createFromPath($file); } else { + //we already have the string, so do the conversion directly here? + if ($encoding && strcasecmp($encoding, 'UTF-8') != 0) { + $file = iconv($encoding, 'UTF-8', $file); + } $this->csv = Reader::createFromString($file); } $this->tempPassword = substr(str_shuffle('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'), 0, 40); diff --git a/composer.json b/composer.json index 865878280c..3040dc4243 100644 --- a/composer.json +++ b/composer.json @@ -20,6 +20,7 @@ "php": "^8.1", "ext-curl": "*", "ext-fileinfo": "*", + "ext-iconv": "*", "ext-json": "*", "ext-mbstring": "*", "ext-pdo": "*", @@ -55,6 +56,7 @@ "nunomaduro/collision": "^7.0", "okvpn/clock-lts": "^1.0", "onelogin/php-saml": "^3.4", + "onnov/detect-encoding": "^2.0", "osa-eg/laravel-teams-notification": "^2.1", "paragonie/constant_time_encoding": "^2.3", "paragonie/sodium_compat": "^1.19", diff --git a/composer.lock b/composer.lock index 17fe70c40a..b8e67655d1 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "0750e3a427347b2a56a05a8b9b533d48", + "content-hash": "2a6e7f5e039ee2f40605aefc5c5baf08", "packages": [ { "name": "alek13/slack", @@ -5574,6 +5574,70 @@ ], "time": "2024-05-30T15:14:26+00:00" }, + { + "name": "onnov/detect-encoding", + "version": "v2.0.0", + "source": { + "type": "git", + "url": "https://github.com/onnov/detect-encoding.git", + "reference": "6a8159ac3e6178ae043244b9d66a9b2701121e07" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/onnov/detect-encoding/zipball/6a8159ac3e6178ae043244b9d66a9b2701121e07", + "reference": "6a8159ac3e6178ae043244b9d66a9b2701121e07", + "shasum": "" + }, + "require": { + "ext-iconv": "*", + "php": ">=7.3" + }, + "require-dev": { + "infection/infection": "*", + "phpbench/phpbench": "*", + "phpcompatibility/php-compatibility": "*", + "phpmd/phpmd": "*", + "phpstan/phpstan": "*", + "phpstan/phpstan-strict-rules": "*", + "phpunit/phpunit": "*", + "roave/backward-compatibility-check": "*", + "squizlabs/php_codesniffer": "*" + }, + "type": "library", + "autoload": { + "psr-4": { + "Onnov\\DetectEncoding\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "onnov", + "email": "oblnn@yandex.ru" + } + ], + "description": "Text encoding definition class instead of mb_detect_encoding. Defines: utf-8, windows-1251, koi8-r, iso-8859-5, ibm866, .....", + "homepage": "https://github.com/onnov/detect-encoding", + "keywords": [ + "cyrillic", + "encoding", + "ibm866", + "iconv", + "iso-8859-5", + "koi8-r", + "mb_detect_encoding", + "utf-8", + "windows-1251" + ], + "support": { + "issues": "https://github.com/onnov/detect-encoding/issues", + "source": "https://github.com/onnov/detect-encoding/tree/v2.0.0" + }, + "time": "2021-01-04T14:29:34+00:00" + }, { "name": "osa-eg/laravel-teams-notification", "version": "v2.1.2", @@ -16570,6 +16634,7 @@ "php": "^8.1", "ext-curl": "*", "ext-fileinfo": "*", + "ext-iconv": "*", "ext-json": "*", "ext-mbstring": "*", "ext-pdo": "*" diff --git a/tests/Feature/Importing/Api/ImportAssetsTest.php b/tests/Feature/Importing/Api/ImportAssetsTest.php index 0f54b22e92..fdea4ca47a 100644 --- a/tests/Feature/Importing/Api/ImportAssetsTest.php +++ b/tests/Feature/Importing/Api/ImportAssetsTest.php @@ -141,6 +141,32 @@ class ImportAssetsTest extends ImportDataTestCase implements TestsPermissionsReq } + #[Test] + public function importInternationalAsset(): void + { + $evil_string = 'blähÅÄÖ'; //'це; //first one is cyrllic? so is second. + $evil_string = 'це'; //cyrliccic - windows-1251 (ONE) + //copypasta the thing? well, the important bits? + $importFileBuilder = ImportFileBuilder::new(['itemName' => $evil_string]); //not 'name' + $row = $importFileBuilder->firstRow(); + $import = Import::factory()->asset()->create(['file_path' => $importFileBuilder->saveToImportsDirectory(null, 'WINDOWS-1251')]); + + $this->actingAsForApi(User::factory()->superuser()->create()); + $this->importFileResponse(['import' => $import->id]) + ->assertOk() + ->assertExactJson([ + 'payload' => null, + 'status' => 'success', + 'messages' => ['redirect_url' => route('hardware.index')] + ]); + + $newAsset = Asset::query() + ->with(['location', 'supplier', 'company', 'assignedAssets', 'defaultLoc', 'assetStatus', 'model.category', 'model.manufacturer']) + ->where('serial', $row['serialNumber']) + ->sole(); + + $this->assertEquals($evil_string, $newAsset->name); + } #[Test] public function willIgnoreUnknownColumnsWhenFileContainsUnknownColumns(): void { diff --git a/tests/Support/Importing/FileBuilder.php b/tests/Support/Importing/FileBuilder.php index fad40054b4..bf08dc96d1 100644 --- a/tests/Support/Importing/FileBuilder.php +++ b/tests/Support/Importing/FileBuilder.php @@ -206,7 +206,7 @@ abstract class FileBuilder * * @return string The filename. */ - public function saveToImportsDirectory(?string $filename = null): string + public function saveToImportsDirectory(?string $filename = null, ?string $locale = null): string { $filename ??= Str::random(40) . '.csv'; @@ -214,9 +214,15 @@ abstract class FileBuilder $stream = fopen(config('app.private_uploads') . "/imports/{$filename}", 'w'); foreach ($this->toCsv() as $row) { + if ($locale) { + $newrow = []; + foreach ($row as $index => $cell) { + $newrow[$index] = iconv('utf-8', $locale, (string) $cell); + } + $row = $newrow; + } fputcsv($stream, $row); } - return $filename; } finally { if (is_resource($stream)) {