Merge pull request #16096 from uberbrady/detect_csv_encodings_v2

This commit is contained in:
snipe 2025-01-20 14:42:07 +00:00 committed by GitHub
commit 446c7fb483
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 131 additions and 4 deletions

View file

@ -9,12 +9,14 @@ use App\Http\Transformers\ImportsTransformer;
use App\Models\Asset;
use App\Models\Company;
use App\Models\Import;
use Illuminate\Http\UploadedFile;
use Illuminate\Support\Facades\Artisan;
use Illuminate\Database\Eloquent\JsonEncodingException;
use Illuminate\Support\Facades\Request;
use Illuminate\Support\Facades\Session;
use Illuminate\Support\Facades\Storage;
use League\Csv\Reader;
use Onnov\DetectEncoding\EncodingDetector;
use Symfony\Component\HttpFoundation\File\Exception\FileException;
use Illuminate\Support\Facades\Log;
use Illuminate\Http\JsonResponse;
@ -45,6 +47,8 @@ class ImportController extends Controller
$path = config('app.private_uploads').'/imports';
$results = [];
$import = new Import;
$detector = new EncodingDetector();
foreach ($files as $file) {
if (! in_array($file->getMimeType(), [
'application/vnd.ms-excel',
@ -55,7 +59,6 @@ class ImportController extends Controller
'text/comma-separated-values',
'text/tsv', ])) {
$results['error'] = 'File type must be CSV. Uploaded file is '.$file->getMimeType();
return response()->json(Helper::formatStandardApiResponse('error', null, $results['error']), 422);
}
@ -63,7 +66,25 @@ class ImportController extends Controller
if (! ini_get('auto_detect_line_endings')) {
ini_set('auto_detect_line_endings', '1');
}
$file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it?
$encoding = $detector->getEncoding($file_contents);
$reader = null;
if (strcasecmp($encoding, 'UTF-8') != 0) {
$transliterated = iconv($encoding, 'UTF-8', $file_contents);
if ($transliterated !== false) {
$tmpname = tempnam(sys_get_temp_dir(), '');
$tmpresults = file_put_contents($tmpname, $transliterated);
if ($tmpresults !== false) {
$transliterated = null; //save on memory?
$newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded'
if ($newfile->isValid()) {
$file = $newfile;
}
}
}
}
$reader = Reader::createFromFileObject($file->openFile('r')); //file pointer leak?
$file_contents = null; //try to save on memory, I guess?
try {
$import->header_row = $reader->fetchOne(0);

View file

@ -20,6 +20,7 @@
"php": "^8.1",
"ext-curl": "*",
"ext-fileinfo": "*",
"ext-iconv": "*",
"ext-json": "*",
"ext-mbstring": "*",
"ext-pdo": "*",
@ -55,6 +56,7 @@
"nunomaduro/collision": "^7.0",
"okvpn/clock-lts": "^1.0",
"onelogin/php-saml": "^3.4",
"onnov/detect-encoding": "^2.0",
"osa-eg/laravel-teams-notification": "^2.1",
"paragonie/constant_time_encoding": "^2.3",
"paragonie/sodium_compat": "^1.19",

67
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "0750e3a427347b2a56a05a8b9b533d48",
"content-hash": "2a6e7f5e039ee2f40605aefc5c5baf08",
"packages": [
{
"name": "alek13/slack",
@ -5574,6 +5574,70 @@
],
"time": "2024-05-30T15:14:26+00:00"
},
{
"name": "onnov/detect-encoding",
"version": "v2.0.0",
"source": {
"type": "git",
"url": "https://github.com/onnov/detect-encoding.git",
"reference": "6a8159ac3e6178ae043244b9d66a9b2701121e07"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/onnov/detect-encoding/zipball/6a8159ac3e6178ae043244b9d66a9b2701121e07",
"reference": "6a8159ac3e6178ae043244b9d66a9b2701121e07",
"shasum": ""
},
"require": {
"ext-iconv": "*",
"php": ">=7.3"
},
"require-dev": {
"infection/infection": "*",
"phpbench/phpbench": "*",
"phpcompatibility/php-compatibility": "*",
"phpmd/phpmd": "*",
"phpstan/phpstan": "*",
"phpstan/phpstan-strict-rules": "*",
"phpunit/phpunit": "*",
"roave/backward-compatibility-check": "*",
"squizlabs/php_codesniffer": "*"
},
"type": "library",
"autoload": {
"psr-4": {
"Onnov\\DetectEncoding\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "onnov",
"email": "oblnn@yandex.ru"
}
],
"description": "Text encoding definition class instead of mb_detect_encoding. Defines: utf-8, windows-1251, koi8-r, iso-8859-5, ibm866, .....",
"homepage": "https://github.com/onnov/detect-encoding",
"keywords": [
"cyrillic",
"encoding",
"ibm866",
"iconv",
"iso-8859-5",
"koi8-r",
"mb_detect_encoding",
"utf-8",
"windows-1251"
],
"support": {
"issues": "https://github.com/onnov/detect-encoding/issues",
"source": "https://github.com/onnov/detect-encoding/tree/v2.0.0"
},
"time": "2021-01-04T14:29:34+00:00"
},
{
"name": "osa-eg/laravel-teams-notification",
"version": "v2.1.2",
@ -16570,6 +16634,7 @@
"php": "^8.1",
"ext-curl": "*",
"ext-fileinfo": "*",
"ext-iconv": "*",
"ext-json": "*",
"ext-mbstring": "*",
"ext-pdo": "*"

View file

@ -3,6 +3,8 @@
namespace Tests\Feature\Importing\Ui;
use App\Models\User;
use Illuminate\Http\UploadedFile;
use PHPUnit\Framework\Attributes\Test;
use Tests\TestCase;
class ImportTest extends TestCase
@ -13,4 +15,35 @@ class ImportTest extends TestCase
->get(route('imports.index'))
->assertOk();
}
public function testStoreInternationalAsset(): void
{
$evil_string = 'це'; //cyrillic - windows-1251 (ONE)
$csv_contents = "a,b,c\n$evil_string,$evil_string,$evil_string\n";
// now, deliberately transform our UTF-8 into Windows-1251 so we can test out the character-set detection
$transliterated_contents = iconv('UTF-8', 'WINDOWS-1251', $csv_contents);
//\Log::error("RAW TRANSLITERATED CONTENTS: $transliterated_contents"); // should show 'unicode missing glyph' symbol in logs.
$this->actingAsForApi(User::factory()->superuser()->create());
$results = $this->post(route('api.imports.store'), ['files' => [UploadedFile::fake()->createWithContent("myname.csv", $transliterated_contents)]])
->assertOk()
->assertJsonStructure([
"files" => [
[
"created_at",
"field_map",
"file_path",
"filesize",
"first_row",
"header_row",
"id",
"import_type",
"name",
]
]
]);
\Log::error(print_r($results, true));
$this->assertEquals($evil_string, $results->json()['files'][0]['first_row'][0]);
}
}

View file

@ -206,7 +206,7 @@ abstract class FileBuilder
*
* @return string The filename.
*/
public function saveToImportsDirectory(?string $filename = null): string
public function saveToImportsDirectory(?string $filename = null, ?string $locale = null): string
{
$filename ??= Str::random(40) . '.csv';
@ -214,9 +214,15 @@ abstract class FileBuilder
$stream = fopen(config('app.private_uploads') . "/imports/{$filename}", 'w');
foreach ($this->toCsv() as $row) {
if ($locale) {
$newrow = [];
foreach ($row as $index => $cell) {
$newrow[$index] = iconv('utf-8', $locale, (string) $cell);
}
$row = $newrow;
}
fputcsv($stream, $row);
}
return $filename;
} finally {
if (is_resource($stream)) {