Merge pull request #16376 from uberbrady/improve_safety_csv_charset_detection

Add some safeties around the charset-detection and transliteration
This commit is contained in:
snipe 2025-02-27 16:26:44 +00:00 committed by GitHub
commit 3928c8afe9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 50 additions and 13 deletions

View file

@ -66,25 +66,41 @@ class ImportController extends Controller
if (! ini_get('auto_detect_line_endings')) {
ini_set('auto_detect_line_endings', '1');
}
$file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it?
$encoding = $detector->getEncoding($file_contents);
$reader = null;
if (strcasecmp($encoding, 'UTF-8') != 0) {
$transliterated = iconv($encoding, 'UTF-8', $file_contents);
if ($transliterated !== false) {
$tmpname = tempnam(sys_get_temp_dir(), '');
$tmpresults = file_put_contents($tmpname, $transliterated);
if ($tmpresults !== false) {
if (function_exists('iconv')) {
$file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it?
$encoding = $detector->getEncoding($file_contents);
\Log::warning("Discovered encoding: $encoding in uploaded CSV");
$reader = null;
if (strcasecmp($encoding, 'UTF-8') != 0) {
$transliterated = false;
try {
$transliterated = iconv(strtoupper($encoding), 'UTF-8', $file_contents);
} catch (\Exception $e) {
$transliterated = false; //blank out the partially-decoded string
return response()->json(
Helper::formatStandardApiResponse(
'error',
null,
trans('admin/hardware/message.import.transliterate_failure', ["encoding" => $encoding])
),
422
);
}
if ($transliterated !== false) {
$tmpname = tempnam(sys_get_temp_dir(), '');
$tmpresults = file_put_contents($tmpname, $transliterated);
$transliterated = null; //save on memory?
$newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded'
if ($newfile->isValid()) {
$file = $newfile;
if ($tmpresults !== false) {
$newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded'
if ($newfile->isValid()) {
$file = $newfile;
}
}
}
}
$file_contents = null; //try to save on memory, I guess?
}
$reader = Reader::createFromFileObject($file->openFile('r')); //file pointer leak?
$file_contents = null; //try to save on memory, I guess?
try {
$import->header_row = $reader->fetchOne(0);

View file

@ -66,6 +66,7 @@ return [
'file_already_deleted' => 'The file selected was already deleted',
'header_row_has_malformed_characters' => 'One or more attributes in the header row contain malformed UTF-8 characters',
'content_row_has_malformed_characters' => 'One or more attributes in the first row of content contain malformed UTF-8 characters',
'transliterate_failure' => 'Transliteration from :encoding to UTF-8 failed due to invalid characters in input'
],

View file

@ -45,4 +45,24 @@ class ImportTest extends TestCase
]);
$this->assertEquals($evil_string, $results->json()['files'][0]['first_row'][0]);
}
public function testStoreInternationalAssetMisparse(): void
{
$evil_maker = function ($arr) {
$results = '';
foreach ($arr as $thing) {
$results .= chr($thing);
}
return $results;
};
// 0xC0 makes it 'not unicode', and 0xFF makes it 'likely WINDOWS-1251', and 0x98 at the end makes it 'not-valid-Windows-1251'
$evil_content = $evil_maker([0xC0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x98]);
$this->actingAsForApi(User::factory()->superuser()->create());
$results = $this->post(route('api.imports.store'), ['files' => [UploadedFile::fake()->createWithContent("myname.csv", $evil_content)]])
->assertStatus(422)
->assertStatusMessageIs('error')
->assertMessagesAre(trans('admin/hardware/message.import.transliterate_failure', ["encoding" => "windows-1251"]));
}
}