diff options
| author | Louise Crow <louise.crow@gmail.com> | 2013-06-04 15:03:02 +0100 | 
|---|---|---|
| committer | Louise Crow <louise.crow@gmail.com> | 2013-06-04 15:03:02 +0100 | 
| commit | a885764b65916020d9182073b38f6951a20d4b8c (patch) | |
| tree | 0988651c144b65a8e46b28b376b2e72a5947d934 /spec/lib | |
| parent | eb1c465162420ad62c16dccb983cb28aa89a4639 (diff) | |
| parent | a919141992a40599f99b32bd4a8312a0009f3f7a (diff) | |
Merge branch 'release/0.11'0.11.0.3
Diffstat (limited to 'spec/lib')
| -rw-r--r-- | spec/lib/basic_encoding_tests.rb | 157 | ||||
| -rw-r--r-- | spec/lib/mail_handler/mail_handler_spec.rb | 103 | ||||
| -rw-r--r-- | spec/lib/sendmail_return_path_spec.rb | 15 | ||||
| -rw-r--r-- | spec/lib/timezone_fixes_spec.rb | 5 | 
4 files changed, 274 insertions, 6 deletions
diff --git a/spec/lib/basic_encoding_tests.rb b/spec/lib/basic_encoding_tests.rb new file mode 100644 index 000000000..35d35fd4a --- /dev/null +++ b/spec/lib/basic_encoding_tests.rb @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') + +def bytes_to_binary_string( bytes, claimed_encoding = nil ) +    claimed_encoding ||= 'ASCII-8BIT' +    bytes_string = bytes.pack('c*') +    if RUBY_VERSION.to_f >= 1.9 +        bytes_string.force_encoding! claimed_encoding +    end +    bytes_string +end + +random_string = bytes_to_binary_string [ 0x0f, 0x58, 0x1c, 0x8f, 0xa4, 0xcf, +                                         0xf6, 0x8c, 0x9d, 0xa7, 0x06, 0xd9, +                                         0xf7, 0x90, 0x6c, 0x6f] + +windows_1252_string = bytes_to_binary_string [ 0x44, 0x41, 0x53, 0x48, 0x20, +                                               0x96, 0x20, 0x44, 0x41, 0x53, +                                               0x48 ] + +# It's a shame this example is so long, but if we don't take enough it +# gets misinterpreted as Shift_JIS + +gb_18030_bytes = [ 0xb9, 0xf3, 0xb9, 0xab, 0xcb, 0xbe, 0xb8, 0xba, 0xd4, 0xf0, +                   0xc8, 0xcb, 0x28, 0xbe, 0xad, 0xc0, 0xed, 0x2f, 0xb2, 0xc6, +                   0xce, 0xf1, 0x29, 0xc4, 0xfa, 0xba, 0xc3, 0xa3, 0xba, 0x0d, +                   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, +                   0x20, 0x20, 0x20, 0xb1, 0xbe, 0xb9, 0xab, 0xcb, 0xbe, 0xd4, +                   0xda, 0x31, 0x39, 0x39, 0x37, 0xc4, 0xea, 0xb3, 0xc9, 0xc1, +                   0xa2, 0xb9, 0xfa, 0xbc, 0xd2, 0xb9, 0xa4, 0xc9, 0xcc, 0xd7, +                   0xa2, 0xb2, 0xe1, 0x2e, 0xca, 0xb5, 0xc1, 0xa6, 0xd0, 0xdb, +                   0xba, 0xf1, 0xa1, 0xa3, 0xd3, 0xd0, 0xb6, 0xc0, 0xc1, 0xa2, +                   0xcb, 0xb0, 0xce, 0xf1, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, +                   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xd7, 0xa8, 0xd2, 0xb5, +                   0xc8, 0xcb, 0xd4, 0xb1, 0x3b, 0xd4, 0xda, 0xc8, 0xab, 0xb9, +                   0xfa, 0xb8, 0xf7, 0xb3, 0xc7, 0xca, 0xd0, 0xc9, 0xe8, 0xc1, +                   0xa2, 0xb7, 0xd6, 0xb9, 0xab, 0xcb, 0xbe, 0xa3, 0xa8, 0xd5, +                   0xe3, 0xbd, 0xad, 0xa1, 0xa2, 0xc9, 0xcf, 0xba, 0xa3, 0xa1, +                   0xa2, 0xb9, 0xe3, 0xd6, 0xdd, 0xa1, 0xa2, 0xbd, 0xad, 0xcb, +                   0xd5, 0xb5, 0xc8, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, +                   0x20, 0x20, 0x20, 0x20, 0x20, 0xb5, 0xd8, 0xb7, 0xbd, 0xa3, +                   0xa9, 0xd2, 0xf2, 0xbd, 0xf8, 0xcf, 0xee, 0xbd, 0xcf, 0xb6, +                   0xe0, 0xcf, 0xd6, 0xcd, 0xea, 0xb3, 0xc9, 0xb2, 0xbb, 0xc1, +                   0xcb, 0xc3, 0xbf, 0xd4, 0xc2, 0xcf, 0xfa, 0xca, 0xdb, 0xb6, +                   0xee, 0xb6, 0xc8, 0xa1, 0xa3, 0xc3, 0xbf, 0xd4, 0xc2, 0xd3, +                   0xd0, 0xd2, 0xbb, 0xb2, 0xbf, 0xb7, 0xd6, 0x0d, 0x0a, 0x20, +                   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xd4, +                   0xf6, 0xd6, 0xb5, 0xb6, 0x90, 0xa3, 0xa8, 0x36, 0x2d, 0x37, +                   0x25, 0xd7, 0xf3, 0xd3, 0xd2, 0x29, 0xba, 0xcd, 0xc6, 0xd5, +                   0xc6, 0xb1, 0xa3, 0xa8, 0x30, 0x2e, 0x35, 0x25, 0x2d, 0x32, +                   0x25, 0x20, 0xd7, 0xf3, 0xd3, 0xd2, 0xa3, 0xa9, 0xd3, 0xc5, +                   0xbb, 0xdd, 0xb4, 0xfa, 0xbf, 0xaa, 0xbb, 0xf2, 0xba, 0xcf, +                   0xd7, 0xf7, 0xa3, 0xac, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, +                   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xb5, 0xe3, 0xca, 0xfd, +                   0xbd, 0xcf, 0xb5, 0xcd, 0xa1, 0xa3, 0xb4, 0xfa, 0xc0, 0xed, +                   0xb7, 0xb6, 0xce, 0xa7, 0xc8, 0xe7, 0xcf, 0xc2, 0xa3, 0xba, +                   0x0d, 0x0a ] + +gb_18030_spam_string = bytes_to_binary_string gb_18030_bytes + +describe "normalize_string_to_utf8" do + +    describe "when passed uniterpretable character data" do + +        it "should reject it as invalid" do + +            expect { +                normalize_string_to_utf8 random_string +            }.to raise_error(EncodingNormalizationError) + +            expect { +                normalize_string_to_utf8 random_string, 'UTF-8' +            }.to raise_error(EncodingNormalizationError) + +        end +    end + +    describe "when passed unlabelled Windows 1252 data" do + +        it "should correctly convert it to UTF-8" do + +            normalized = normalize_string_to_utf8 windows_1252_string + +            normalized.should ==  "DASH – DASH" + +        end + +    end + +    describe "when passed GB 18030 data" do + +        it "should correctly convert it to UTF-8 if unlabelled" do + +            normalized = normalize_string_to_utf8 gb_18030_spam_string + +            normalized.should start_with("贵公司负责人") + +        end + +    end + +end + +describe "convert_string_to_utf8_or_binary" do + +    describe "when passed uniterpretable character data" do + +        it "should return it as a binary string" do + +            converted = convert_string_to_utf8_or_binary random_string +            converted.should == random_string + +            if RUBY_VERSION.to_f >= 1.9 +                converted.encoding.should == 'ASCII-8BIT' +            end + +            converted = convert_string_to_utf8_or_binary random_string,'UTF-8' +            converted.should == random_string + +            if RUBY_VERSION.to_f >= 1.9 +                converted.encoding.should == 'ASCII-8BIT' +            end + +        end +    end + +    describe "when passed unlabelled Windows 1252 data" do + +        it "should correctly convert it to UTF-8" do + +            converted = convert_string_to_utf8_or_binary windows_1252_string + +            converted.should ==  "DASH – DASH" + +            if RUBY_VERSION.to_f >= 1.9 +                converted.encoding.should == 'UTF-8' +            end +        end + +    end + +    describe "when passed GB 18030 data" do + +        it "should correctly convert it to UTF-8 if unlabelled" do + +            converted = convert_string_to_utf8_or_binary gb_18030_spam_string + +            converted.should start_with("贵公司负责人") + +            if RUBY_VERSION.to_f >= 1.9 +                converted.encoding.should == 'UTF-8' +            end +        end + +    end + +end diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 48c32e2bc..272b56d0b 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -20,12 +20,46 @@ describe 'when creating a mail object from raw data' do          mail.to.should == ["request-66666-caa77777@whatdotheyknow.com", "foi@example.com"]      end +    it 'should return nil for malformed To: and Cc: lines' do +        mail = get_fixture_mail('malformed-to-and-cc.email') +        mail.to.should == nil +        mail.cc.should == nil +    end +      it 'should convert an iso8859 email to utf8' do          mail = get_fixture_mail('iso8859_2_raw_email.email') -        mail.subject.should have_text(/gjatë/u) +        mail.subject.should match /gjatë/u          MailHandler.get_part_body(mail).is_utf8?.should == true      end +    it 'should not be confused by subject lines with malformed UTF-8 at the end' do +        # The base64 subject line was generated with: +        #   printf "hello\360" | base64 +        # ... and wrapping the result in '=?UTF-8?B?' and '?=' +        mail = get_fixture_mail('subject-bad-utf-8-trailing-base64.email') +        mail.subject.should == 'hello' +        # The quoted printable subject line was generated with: +        #   printf "hello\360" | qprint -b -e +        # ... and wrapping the result in '=?UTF-8?Q?' and '?=' +        mail = get_fixture_mail('subject-bad-utf-8-trailing-quoted-printable.email') +        mail.subject.should == 'hello' +    end + +    it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do +        mail = get_fixture_mail('mislabelled-as-iso-8859-1.email') +        body = MailHandler.get_part_body(mail) +        body.is_utf8?.should == true +        # This email is broken in at least these two ways: +        #  1. It contains a top bit set character (0x96) despite the +        #     "Content-Transfer-Encoding: 7bit" +        #  2. The charset in the Content-Type header is "iso-8859-1" +        #     but 0x96 is actually a Windows-1252 en dash, which would +        #     be Unicode codepoint 2013.  It should be possible to +        #     spot the mislabelling, since 0x96 isn't a valid +        #     ISO-8859-1 character. +        body.should match(/ \xe2\x80\x93 /) +    end +  end  describe 'when asked for the from name' do @@ -275,6 +309,12 @@ end  describe 'when getting attachment attributes' do +    it 'should handle a mail with a non-multipart part with no charset in the Content-Type header' do +        mail = get_fixture_mail('part-without-charset-in-content-type.email') +        attributes = MailHandler.get_attachment_attributes(mail) +        attributes.size.should == 2 +    end +      it 'should get two attachment parts from a multipart mail with text and html alternatives      and an image' do          mail = get_fixture_mail('quoted-subject-iso8859-1.email') @@ -282,6 +322,13 @@ describe 'when getting attachment attributes' do          attributes.size.should == 2      end +    it 'should get one attachment from a multipart mail with text and HTML alternatives, which should be UTF-8' do +        mail = get_fixture_mail('iso8859_2_raw_email.email') +        attributes = MailHandler.get_attachment_attributes(mail) +        attributes.length.should == 1 +        attributes[0][:body].is_utf8?.should == true +    end +      it 'should expand a mail attached as text' do          # Note that this spec will only pass using Tmail in the timezone set as datetime headers          # are rendered out in the local time - using the Mail gem this is not necessary @@ -304,6 +351,52 @@ describe 'when getting attachment attributes' do          attributes = MailHandler.get_attachment_attributes(mail)      end +    it 'should ignore truncated TNEF attachment' do +        mail = get_fixture_mail('tnef-attachment-truncated.email') +        attributes = MailHandler.get_attachment_attributes(mail) +        attributes.length.should == 2 +    end + +    it 'should ignore anything beyond the final MIME boundary' do +        pending do +            # This example raw email has a premature closing boundary for +            # the outer multipart/mixed - my reading of RFC 1521 is that +            # the "epilogue" beyond that should be ignored. +            # See https://github.com/mysociety/alaveteli/issues/922 for +            # more discussion. +            mail = get_fixture_mail('nested-attachments-premature-end.email') +            attributes = MailHandler.get_attachment_attributes(mail) +            attributes.length.should == 3 +        end +    end + +    it 'should cope with a missing final MIME boundary' do +        mail = get_fixture_mail('multipart-no-final-boundary.email') +        attributes = MailHandler.get_attachment_attributes(mail) +        attributes.length.should == 1 +        attributes[0][:body].should match(/This is an acknowledgement of your email/) +        attributes[0][:content_type].should == "text/plain" +        attributes[0][:url_part_number].should == 1 +    end + +    it 'should ignore a TNEF attachment with no usable contents' do +        # FIXME: "no usable contents" is slightly misleading.  The +        # attachment in this example email does have usable content in +        # the body of the TNEF attachment, but the invocation of tnef +        # historically used to unpack these attachments doesn't add +        # the --save-body parameter, so that they have been ignored so +        # far.  We probably should include the body from such +        # attachments, but, at the moment, with the pending upgrade to +        # Rails 3, we just want to check that the behaviour is the +        # same as before. +        mail = get_fixture_mail('tnef-attachment-empty.email') +        attributes = MailHandler.get_attachment_attributes(mail) +        attributes.length.should == 2 +        # This is the size of the TNEF-encoded attachment; currently, +        # we expect the code just to return this without decoding: +        attributes[1][:body].length.should == 7769 +    end +      it 'should produce a consistent set of url_part_numbers, content_types, within_rfc822_subjects          and filenames from an example mail with lots of attachments' do          mail = get_fixture_mail('many-attachments-date-header.email') @@ -385,3 +478,11 @@ describe 'when getting attachment attributes' do          end      end  end + +describe 'when getting the address part from an address string' do + +    it 'should handle non-ascii characters in the name input' do +        address = "\"Someone’s name\" <test@example.com>" +        MailHandler.address_from_string(address).should == 'test@example.com' +    end +end diff --git a/spec/lib/sendmail_return_path_spec.rb b/spec/lib/sendmail_return_path_spec.rb index 137869b6e..83436c2bd 100644 --- a/spec/lib/sendmail_return_path_spec.rb +++ b/spec/lib/sendmail_return_path_spec.rb @@ -1,5 +1,10 @@  # This is a test of the monkey patches in sendmail_return_path.rb +# In Rails 3 the monkeypatches are not needed anymore because sendmail now has the "-f" flag +# set correctly. So, strictly these tests are testing the Rails internals. So, that means we really +# should delete them. Let's do that later when things have settled down. For the time being leave +# them in +  require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')  describe "when sending email with an altered return path" do @@ -28,10 +33,10 @@ describe "when sending email with an altered return path" do          Net::SMTP.stub!(:new).and_return(mock_smtp)          with_delivery_method :smtp do -            ContactMailer.deliver_to_admin_message( +            ContactMailer.to_admin_message(                  "Mr. Test", "test@localhost", "Test script spec/lib/sendmail_return_path_spec.rb",                  "This is just a test for a test script", nil, nil, nil -            ) +            ).deliver          end          deliveries = ActionMailer::Base.deliveries @@ -40,12 +45,12 @@ describe "when sending email with an altered return path" do      it "should set the return path when sending email using sendmail" do          with_stub_popen do -            IO.should_receive(:popen).once.with('/usr/sbin/sendmail -i -t -f "test@localhost"', "w+") +            IO.should_receive(:popen).once.with('/usr/sbin/sendmail -i -t -f "test@localhost" postmaster@localhost', "w+")              with_delivery_method :sendmail do -                ContactMailer.deliver_to_admin_message( +                ContactMailer.to_admin_message(                      "Mr. Test", "test@localhost", "Test script spec/lib/sendmail_return_path_spec.rb",                      "This is just a test for a test script", nil, nil, nil -                ) +                ).deliver              end          end diff --git a/spec/lib/timezone_fixes_spec.rb b/spec/lib/timezone_fixes_spec.rb index 9d6ade526..8a9a3bf31 100644 --- a/spec/lib/timezone_fixes_spec.rb +++ b/spec/lib/timezone_fixes_spec.rb @@ -3,6 +3,11 @@  # We use MailServerLogDone here just as a totally random model that has a datetime type.  require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') +# In Rails 3 the monkeypatch that these tests are testing is not necessary. So, +# since these tests are testing the Rails internals you could argue that they shouldn't +# be here. Well, you're right. But let's leave them in for the time being until the upgrade is finished. +# Then, we should probably delete this whole file +  describe "when doing things with timezones" do    it "should preserve time objects with local time conversion to default timezone UTC  | 
