diff options
| author | Louise Crow <louise.crow@gmail.com> | 2015-05-14 17:11:24 +0100 | 
|---|---|---|
| committer | Louise Crow <louise.crow@gmail.com> | 2015-05-15 16:25:15 +0100 | 
| commit | f198ca6944cb5ad5b81efbe42233837b8b773fbb (patch) | |
| tree | cf2076727ab19219d052cd86dc1ab09af55d373e | |
| parent | 203eea18feeaec3dc9a3e8e8af3b83de085b53ac (diff) | |
Force attachment filenames to utf-8 before trying to save them
In a database with encoding SQL-ASCII, an invalid utf-8 filename
can be saved but will cause an "invalid byte sequence in UTF-8"
when the filename is prepared for display. In a database with a
UTF-8 encoding, saving the string will cause an error like
"ActiveRecord::StatementInvalid (PG::Error: ERROR: invalid byte
sequence for encoding "UTF8""
| -rw-r--r-- | lib/mail_handler/backends/mail_backend.rb | 7 | ||||
| -rw-r--r-- | lib/normalize_string.rb | 9 | ||||
| -rw-r--r-- | spec/fixtures/files/non-utf8-filename.email | 52 | ||||
| -rw-r--r-- | spec/lib/mail_handler/backends/mail_backend_spec.rb | 9 | 
4 files changed, 73 insertions, 4 deletions
| diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index ae3077a72..9b647e2fa 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -64,7 +64,12 @@ module MailHandler              # Return a copy of the file name for the mail part              def get_part_file_name(part)                  part_file_name = part.filename -                part_file_name.nil? ? nil : part_file_name.dup +                part_file_name = part_file_name.nil? ? nil : part_file_name.dup +                if part_file_name +                    part_file_name = CGI.unescape(part_file_name) +                    part_file_name = convert_string_to_utf8(part_file_name, part.charset) +                end +                part_file_name              end              # Get the body of a mail part diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb index de847cd16..1205a1d9c 100644 --- a/lib/normalize_string.rb +++ b/lib/normalize_string.rb @@ -76,9 +76,12 @@ def convert_string_to_utf8(s, suggested_character_encoding=nil)      begin          result = normalize_string_to_utf8 s, suggested_character_encoding      rescue EncodingNormalizationError -        result = s.force_encoding("utf-8").encode("utf-8", :invalid => :replace, -                                   :undef => :replace, -                                   :replace => "") if String.method_defined?(:encode) +        result = s +        if String.method_defined?(:encode) +            result = s.force_encoding("utf-8").encode("utf-8", :invalid => :replace, +                                                               :undef => :replace, +                                                               :replace => "") +        end      end      result  end diff --git a/spec/fixtures/files/non-utf8-filename.email b/spec/fixtures/files/non-utf8-filename.email new file mode 100644 index 000000000..ed1f1a9f5 --- /dev/null +++ b/spec/fixtures/files/non-utf8-filename.email @@ -0,0 +1,52 @@ +From authority@example.org  Tue Dec  3 11:13:02 2013 +Return-path: <authority@example.org> +Envelope-to: requester@example.org +Delivery-date: Tue, 03 Dec 2013 11:13:00 +0000 +From: Test Authority <authority@example.org> +To: requester@example.org +Subject: testing a PDF attachment with the wrong content-type +Date: Tue, 03 Dec 2013 11:12:45 +0000 +Message-ID: <87li09xuasdfasdfpoija@blahblah> +Content-Type: multipart/mixed; +	boundary="_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_" +MIME-Version: 1.0 +X-GlobalCerts-Milter: WDC-SECUREMAIL02.wokingham.gov.uk 13Feb2014-16:41:39.109 +X-Scanned-By: MailControl 26514.0 (www.mailcontrol.com) on 10.70.0.132 + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: multipart/alternative; +	boundary="_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_" + +--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Some text + +--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Some html + +--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_-- + + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: application/vnd.ms-excel; +	name="RV %A312000 or more.xls" +Content-Description: RV %A312000 or more.xls +Content-Disposition: attachment; creation-date="Thu, 13 Feb 2014 16:36:59 GMT"; filename="RV %A312000 or more.xls"; modification-date="Thu, 13 Feb 2014 16:41:36 GMT"; size="332288" +Content-Transfer-Encoding: base64 + +some base 64as;dm mklasd + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: application/vnd.ms-excel; name="other.xls" +Content-Description: other.xls +Content-Disposition: attachment; creation-date="Thu, 13 Feb 2014 16:37:02 GMT"; filename="other.xls"; modification-date="Thu, 13 Feb 2014 16:41:36 GMT"; size="33280" +Content-Transfer-Encoding: base64 + +some base 64 + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_--
\ No newline at end of file diff --git a/spec/lib/mail_handler/backends/mail_backend_spec.rb b/spec/lib/mail_handler/backends/mail_backend_spec.rb index 588033faf..895160670 100644 --- a/spec/lib/mail_handler/backends/mail_backend_spec.rb +++ b/spec/lib/mail_handler/backends/mail_backend_spec.rb @@ -37,6 +37,15 @@ describe MailHandler::Backends::MailBackend do              get_part_file_name(part).should be_nil          end +        it 'turns an invalid UTF-8 name into a valid one' do +            mail = get_fixture_mail('non-utf8-filename.email') +            part = mail.attachments.first +            filename = get_part_file_name(part) +            if filename.respond_to?(:valid_encoding) +               filename.valid_encoding?.should == true +            end +        end +      end      describe :get_part_body do | 
