diff options
| author | Louise Crow <louise.crow@gmail.com> | 2015-05-28 10:57:19 +0100 | 
|---|---|---|
| committer | Louise Crow <louise.crow@gmail.com> | 2015-05-28 10:57:19 +0100 | 
| commit | e936b687a7df62dc69d1cc0493492346bd8e300a (patch) | |
| tree | 61206adc9f66b7bfc691174c9f93c92ccea1ffe9 | |
| parent | c6e8e298fd0861cd822f23f370a01ccbc21cb9d3 (diff) | |
| parent | 495f82365ccd374d5b33339abce356f3a3662295 (diff) | |
Merge branch 'force-filenames-to-utf8' into rails-3-develop
| -rw-r--r-- | lib/mail_handler/backends/mail_backend.rb | 7 | ||||
| -rw-r--r-- | lib/normalize_string.rb | 14 | ||||
| -rw-r--r-- | spec/fixtures/files/non-utf8-filename.email | 52 | ||||
| -rw-r--r-- | spec/lib/basic_encoding_spec.rb | 57 | ||||
| -rw-r--r-- | spec/lib/mail_handler/backends/mail_backend_spec.rb | 9 | 
5 files changed, 137 insertions, 2 deletions
| diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 5a7e0ef65..9e3fbc008 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -65,7 +65,12 @@ module MailHandler              # Return a copy of the file name for the mail part              def get_part_file_name(part)                  part_file_name = part.filename -                part_file_name.nil? ? nil : part_file_name.dup +                part_file_name = part_file_name.nil? ? nil : part_file_name.dup +                if part_file_name +                    part_file_name = CGI.unescape(part_file_name) +                    part_file_name = convert_string_to_utf8(part_file_name, part.charset) +                end +                part_file_name              end              # Get the body of a mail part diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb index 409262b8e..d850d7e05 100644 --- a/lib/normalize_string.rb +++ b/lib/normalize_string.rb @@ -73,6 +73,20 @@ def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil)      result  end +def convert_string_to_utf8(s, suggested_character_encoding=nil) +    begin +        result = normalize_string_to_utf8 s, suggested_character_encoding +    rescue EncodingNormalizationError +        result = s +        if String.method_defined?(:encode) +            result = s.force_encoding("utf-8").encode("utf-8", :invalid => :replace, +                                                               :undef => :replace, +                                                               :replace => "") +        end +    end +    result +end +  def log_text_details(message, text)      if String.method_defined?(:encode)          STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}" diff --git a/spec/fixtures/files/non-utf8-filename.email b/spec/fixtures/files/non-utf8-filename.email new file mode 100644 index 000000000..ed1f1a9f5 --- /dev/null +++ b/spec/fixtures/files/non-utf8-filename.email @@ -0,0 +1,52 @@ +From authority@example.org  Tue Dec  3 11:13:02 2013 +Return-path: <authority@example.org> +Envelope-to: requester@example.org +Delivery-date: Tue, 03 Dec 2013 11:13:00 +0000 +From: Test Authority <authority@example.org> +To: requester@example.org +Subject: testing a PDF attachment with the wrong content-type +Date: Tue, 03 Dec 2013 11:12:45 +0000 +Message-ID: <87li09xuasdfasdfpoija@blahblah> +Content-Type: multipart/mixed; +	boundary="_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_" +MIME-Version: 1.0 +X-GlobalCerts-Milter: WDC-SECUREMAIL02.wokingham.gov.uk 13Feb2014-16:41:39.109 +X-Scanned-By: MailControl 26514.0 (www.mailcontrol.com) on 10.70.0.132 + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: multipart/alternative; +	boundary="_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_" + +--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Some text + +--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Some html + +--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_-- + + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: application/vnd.ms-excel; +	name="RV %A312000 or more.xls" +Content-Description: RV %A312000 or more.xls +Content-Disposition: attachment; creation-date="Thu, 13 Feb 2014 16:36:59 GMT"; filename="RV %A312000 or more.xls"; modification-date="Thu, 13 Feb 2014 16:41:36 GMT"; size="332288" +Content-Transfer-Encoding: base64 + +some base 64as;dm mklasd + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: application/vnd.ms-excel; name="other.xls" +Content-Description: other.xls +Content-Disposition: attachment; creation-date="Thu, 13 Feb 2014 16:37:02 GMT"; filename="other.xls"; modification-date="Thu, 13 Feb 2014 16:41:36 GMT"; size="33280" +Content-Transfer-Encoding: base64 + +some base 64 + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_--
\ No newline at end of file diff --git a/spec/lib/basic_encoding_spec.rb b/spec/lib/basic_encoding_spec.rb index 1b3d9cd1c..d77465ad8 100644 --- a/spec/lib/basic_encoding_spec.rb +++ b/spec/lib/basic_encoding_spec.rb @@ -103,7 +103,7 @@ end  describe "convert_string_to_utf8_or_binary" do -    describe "when passed uniterpretable character data" do +    describe "when passed uninterpretable character data" do          it "should return it as a binary string" do @@ -155,3 +155,58 @@ describe "convert_string_to_utf8_or_binary" do      end  end + +describe "convert_string_to_utf8" do + +    describe "when passed uninterpretable character data" do + +        it "should return it as a utf8 string" do + +            converted = convert_string_to_utf8 random_string +            converted.should == random_string + +            if String.method_defined?(:encode) +                converted.encoding.to_s.should == 'UTF-8' +            end + +            converted = convert_string_to_utf8 random_string,'UTF-8' +            converted.should == random_string + +            if String.method_defined?(:encode) +                converted.encoding.to_s.should == 'UTF-8' +            end + +        end +    end + +    describe "when passed unlabelled Windows 1252 data" do + +        it "should correctly convert it to UTF-8" do + +            converted = convert_string_to_utf8 windows_1252_string + +            converted.should ==  "DASH – DASH" + +            if String.method_defined?(:encode) +                converted.encoding.to_s.should == 'UTF-8' +            end +        end + +    end + +    describe "when passed GB 18030 data" do + +        it "should correctly convert it to UTF-8 if unlabelled" do + +            converted = convert_string_to_utf8 gb_18030_spam_string + +            converted.should start_with("贵公司负责人") + +            if String.method_defined?(:encode) +                converted.encoding.to_s.should == 'UTF-8' +            end +        end + +    end + +end
\ No newline at end of file diff --git a/spec/lib/mail_handler/backends/mail_backend_spec.rb b/spec/lib/mail_handler/backends/mail_backend_spec.rb index dfd6dd1fe..044fbef4f 100644 --- a/spec/lib/mail_handler/backends/mail_backend_spec.rb +++ b/spec/lib/mail_handler/backends/mail_backend_spec.rb @@ -37,6 +37,15 @@ describe MailHandler::Backends::MailBackend do              get_part_file_name(part).should be_nil          end +        it 'turns an invalid UTF-8 name into a valid one' do +            mail = get_fixture_mail('non-utf8-filename.email') +            part = mail.attachments.first +            filename = get_part_file_name(part) +            if filename.respond_to?(:valid_encoding) +               filename.valid_encoding?.should == true +            end +        end +      end      describe :get_part_body do | 
