Merge pull request #16376 from uberbrady/improve_safety_csv_charset_detection

Add some safeties around the charset-detection and transliteration
This commit is contained in:
snipe 2025-02-27 16:26:44 +00:00 committed by GitHub
commit 3928c8afe9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 50 additions and 13 deletions

View file

@ -66,16 +66,31 @@ class ImportController extends Controller
if (! ini_get('auto_detect_line_endings')) { if (! ini_get('auto_detect_line_endings')) {
ini_set('auto_detect_line_endings', '1'); ini_set('auto_detect_line_endings', '1');
} }
if (function_exists('iconv')) {
$file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it? $file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it?
$encoding = $detector->getEncoding($file_contents); $encoding = $detector->getEncoding($file_contents);
\Log::warning("Discovered encoding: $encoding in uploaded CSV");
$reader = null; $reader = null;
if (strcasecmp($encoding, 'UTF-8') != 0) { if (strcasecmp($encoding, 'UTF-8') != 0) {
$transliterated = iconv($encoding, 'UTF-8', $file_contents); $transliterated = false;
try {
$transliterated = iconv(strtoupper($encoding), 'UTF-8', $file_contents);
} catch (\Exception $e) {
$transliterated = false; //blank out the partially-decoded string
return response()->json(
Helper::formatStandardApiResponse(
'error',
null,
trans('admin/hardware/message.import.transliterate_failure', ["encoding" => $encoding])
),
422
);
}
if ($transliterated !== false) { if ($transliterated !== false) {
$tmpname = tempnam(sys_get_temp_dir(), ''); $tmpname = tempnam(sys_get_temp_dir(), '');
$tmpresults = file_put_contents($tmpname, $transliterated); $tmpresults = file_put_contents($tmpname, $transliterated);
if ($tmpresults !== false) {
$transliterated = null; //save on memory? $transliterated = null; //save on memory?
if ($tmpresults !== false) {
$newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded' $newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded'
if ($newfile->isValid()) { if ($newfile->isValid()) {
$file = $newfile; $file = $newfile;
@ -83,8 +98,9 @@ class ImportController extends Controller
} }
} }
} }
$reader = Reader::createFromFileObject($file->openFile('r')); //file pointer leak?
$file_contents = null; //try to save on memory, I guess? $file_contents = null; //try to save on memory, I guess?
}
$reader = Reader::createFromFileObject($file->openFile('r')); //file pointer leak?
try { try {
$import->header_row = $reader->fetchOne(0); $import->header_row = $reader->fetchOne(0);

View file

@ -66,6 +66,7 @@ return [
'file_already_deleted' => 'The file selected was already deleted', 'file_already_deleted' => 'The file selected was already deleted',
'header_row_has_malformed_characters' => 'One or more attributes in the header row contain malformed UTF-8 characters', 'header_row_has_malformed_characters' => 'One or more attributes in the header row contain malformed UTF-8 characters',
'content_row_has_malformed_characters' => 'One or more attributes in the first row of content contain malformed UTF-8 characters', 'content_row_has_malformed_characters' => 'One or more attributes in the first row of content contain malformed UTF-8 characters',
'transliterate_failure' => 'Transliteration from :encoding to UTF-8 failed due to invalid characters in input'
], ],

View file

@ -45,4 +45,24 @@ class ImportTest extends TestCase
]); ]);
$this->assertEquals($evil_string, $results->json()['files'][0]['first_row'][0]); $this->assertEquals($evil_string, $results->json()['files'][0]['first_row'][0]);
} }
public function testStoreInternationalAssetMisparse(): void
{
$evil_maker = function ($arr) {
$results = '';
foreach ($arr as $thing) {
$results .= chr($thing);
}
return $results;
};
// 0xC0 makes it 'not unicode', and 0xFF makes it 'likely WINDOWS-1251', and 0x98 at the end makes it 'not-valid-Windows-1251'
$evil_content = $evil_maker([0xC0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x98]);
$this->actingAsForApi(User::factory()->superuser()->create());
$results = $this->post(route('api.imports.store'), ['files' => [UploadedFile::fake()->createWithContent("myname.csv", $evil_content)]])
->assertStatus(422)
->assertStatusMessageIs('error')
->assertMessagesAre(trans('admin/hardware/message.import.transliterate_failure', ["encoding" => "windows-1251"]));
}
} }