diff options
| author | Louise Crow <louise.crow@gmail.com> | 2013-05-28 15:14:44 +0100 | 
|---|---|---|
| committer | Louise Crow <louise.crow@gmail.com> | 2013-05-28 15:14:44 +0100 | 
| commit | ef682f7d7c3402713efea88775b246c3c7960aa2 (patch) | |
| tree | e4918a75cf1a24eecaab287602cf561245b611fc | |
| parent | 988becbb62e42feb9b62af27cdb2ec67b409032a (diff) | |
| parent | c248356a8e8a13513827381977b24f7406f96a8c (diff) | |
Merge branch 'rails-3-develop' of ssh://git.mysociety.org/data/git/public/alaveteli into rails-3-develop
Conflicts:
    Gemfile.lock
30 files changed, 1254 insertions, 79 deletions
| @@ -10,6 +10,7 @@ source 'https://rubygems.org'  gem 'rails', '3.1.12'  gem 'pg' +gem 'charlock_holmes'  gem 'fastercsv', '>=1.5.5'  gem 'json'  gem 'mahoro' diff --git a/Gemfile.lock b/Gemfile.lock index 1864dd4fd..24e4dd5e3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -58,6 +58,7 @@ GEM        net-sftp (>= 2.0.0)        net-ssh (>= 2.0.14)        net-ssh-gateway (>= 1.1.0) +    charlock_holmes (0.6.9.4)      chunky_png (1.2.8)      colorize (0.5.8)      columnize (0.3.6) @@ -243,6 +244,7 @@ DEPENDENCIES    annotate    bootstrap-sass    capistrano +  charlock_holmes    compass    coveralls    debugger diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index a0f88096e..b8ccdf926 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -5,7 +5,6 @@  # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.  # Email: hello@mysociety.org; WWW: http://www.mysociety.org/ -require 'alaveteli_file_types'  require 'zip/zip'  require 'open-uri' @@ -722,7 +721,7 @@ class RequestController < ApplicationController              yield -            if params[:skip_cache].nil? +            if params[:skip_cache].nil? && response.status == 200                  # write it to the fileystem ourselves, so is just a plain file. (The                  # various fragment cache functions using Ruby Marshall to write the file                  # which adds a header, so isnt compatible with images that have been @@ -737,6 +736,7 @@ class RequestController < ApplicationController      def get_attachment          get_attachment_internal(false) +        return unless @attachment          # Prevent spam to magic request address. Note that the binary          # subsitution method used depends on the content type @@ -756,6 +756,7 @@ class RequestController < ApplicationController              raise ActiveRecord::RecordNotFound.new("Attachment HTML not found.")          end          get_attachment_internal(true) +        return unless @attachment          # images made during conversion (e.g. images in PDF files) are put in the cache directory, so          # the same cache code in cache_attachments above will display them. @@ -802,8 +803,11 @@ class RequestController < ApplicationController          # check permissions          raise "internal error, pre-auth filter should have caught this" if !@info_request.user_can_view?(authenticated_user) -        @attachment = IncomingMessage.get_attachment_by_url_part_number(@incoming_message.get_attachments_for_display, @part_number) -        raise ActiveRecord::RecordNotFound.new("attachment not found part number " + @part_number.to_s + " incoming_message " + @incoming_message.id.to_s) if @attachment.nil? +        @attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(@incoming_message.get_attachments_for_display, @part_number, @original_filename) +        # If we can't find the right attachment, redirect to the incoming message: +        unless @attachment +            return redirect_to incoming_message_url(@incoming_message), :status => 303 +        end          # check filename in URL matches that in database (use a censor rule if you want to change a filename)          raise ActiveRecord::RecordNotFound.new("please use same filename as original file has, display: '" + @attachment.display_filename + "' old_display: '" + @attachment.old_display_filename + "' original: '" + @original_filename + "'") if @attachment.display_filename != @original_filename && @attachment.old_display_filename != @original_filename diff --git a/app/mailers/request_mailer.rb b/app/mailers/request_mailer.rb index 3eb89c660..4dbce6738 100644 --- a/app/mailers/request_mailer.rb +++ b/app/mailers/request_mailer.rb @@ -4,8 +4,6 @@  # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.  # Email: hello@mysociety.org; WWW: http://www.mysociety.org/ -require 'alaveteli_file_types' -  class RequestMailer < ApplicationMailer      # Used when an FOI officer uploads a response from their web browser - this is      # the "fake" email used to store in the same format in the database as if they diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index fcde379e0..0340f2b83 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -71,7 +71,12 @@ class FoiAttachment < ActiveRecord::Base              tries = 0              delay = 1              begin -                @cached_body = File.open(self.filepath, "rb" ).read +                binary_data = File.open(self.filepath, "rb" ).read +                if self.content_type =~ /^text/ +                    @cached_body = convert_string_to_utf8_or_binary(binary_data, 'UTF-8') +                else +                    @cached_body = binary_data +                end              rescue Errno::ENOENT                  # we've lost our cached attachments for some reason.  Reparse them.                  if tries > BODY_MAX_TRIES diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index c914edb7e..252f81bb7 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -31,12 +31,9 @@  # Move some of the (e.g. quoting) functions here into rblib, as they feel  # general not specific to IncomingMessage. -require 'alaveteli_file_types'  require 'htmlentities'  require 'rexml/document'  require 'zip/zip' -require 'mapi/msg' -require 'mapi/convert'  require 'iconv' unless RUBY_VERSION >= '1.9'  class IncomingMessage < ActiveRecord::Base @@ -132,6 +129,7 @@ class IncomingMessage < ActiveRecord::Base                  end                  self.valid_to_reply_to = self._calculate_valid_to_reply_to                  self.last_parsed = Time.now +                self.foi_attachments reload=true                  self.save!              end          end @@ -173,15 +171,29 @@ class IncomingMessage < ActiveRecord::Base          super      end -    # And look up by URL part number to get an attachment +    # And look up by URL part number and display filename to get an attachment      # XXX relies on extract_attachments calling MailHandler.ensure_parts_counted -    def self.get_attachment_by_url_part_number(attachments, found_url_part_number) -        attachments.each do |a| -            if a.url_part_number == found_url_part_number -                return a +    # The filename here is passed from the URL parameter, so it's the +    # display_filename rather than the real filename. +    def self.get_attachment_by_url_part_number_and_filename(attachments, found_url_part_number, display_filename) +        attachment_by_part_number = attachments.detect { |a| a.url_part_number == found_url_part_number } +        if attachment_by_part_number && attachment_by_part_number.display_filename == display_filename +            # Then the filename matches, which is fine: +            attachment_by_part_number +        else +            # Otherwise if the URL part number and filename don't +            # match - this is probably due to a reparsing of the +            # email.  In that case, try to find a unique matching +            # filename from any attachment. +            attachments_by_filename = attachments.select { |a| +                a.display_filename == display_filename +            } +            if attachments_by_filename.length == 1 +                attachments_by_filename[0] +            else +                nil              end          end -        return nil      end      # Converts email addresses we know about into textual descriptions of them diff --git a/config/initializers/alaveteli.rb b/config/initializers/alaveteli.rb index 4acc126d0..a9e9d498d 100644 --- a/config/initializers/alaveteli.rb +++ b/config/initializers/alaveteli.rb @@ -59,6 +59,8 @@ require 'quiet_opener.rb'  require 'mail_handler'  require 'public_body_categories'  require 'ability' +require 'normalize_string' +require 'alaveteli_file_types'  # Allow tests to be run under a non-superuser database account if required  if Rails.env == 'test' and ActiveRecord::Base.configurations['test']['constraint_disabling'] == false diff --git a/config/packages b/config/packages index db51e5bdd..fc67cda6b 100644 --- a/config/packages +++ b/config/packages @@ -36,4 +36,5 @@ rake (>= 0.9.2.2)  build-essential  bundler  sqlite3 -libsqlite3-dev
\ No newline at end of file +libsqlite3-dev +libicu-dev diff --git a/doc/THEMES-UPGRADE.md b/doc/THEMES-UPGRADE.md new file mode 100644 index 000000000..457274d7a --- /dev/null +++ b/doc/THEMES-UPGRADE.md @@ -0,0 +1,101 @@ +This file contains some notes on changing your Alaveteli theme for the +upgrade to Rails 3, in version 0.11 of Alaveteli.  These were written +by Henare Degan, with some additions by Mark Longair. + +# Alaveteli Theme Upgrade Checks + +## RAILS_ROOT/RAILS_ENV + +[Example](https://github.com/henare/adminbootstraptheme/commit/857e33c9b0bc577024b476404aec4f9749f65a0b) + +Check your theme for instances of: + +* `RAILS_ROOT` and replace it with `Rails.root` +* `RAILS_ENV` and replace it with `Rails.env` + +Note that `Rails.root` is a `Pathname`, so you can replace, for +example: + +    File.join(RAILS_ROOT, 'public', 'alavetelitheme') + +... with: + +    Rails.root.join('public', 'alavetelitheme') + +## Dispatcher + +[Example](https://github.com/henare/adminbootstraptheme/commit/fba2d6b7dfdc26a25fdc1596bfe120270dd4cd0d) + +This... + +```ruby +require 'dispatcher' +Dispatcher.to_prepare do +``` + +should be replaced with this... + +```ruby +Rails.configuration.to_prepare do +```` + +## Routes + +[Example](https://github.com/henare/adminbootstraptheme/commit/87f1991dafb09401f9b17f642a94382d5a47a713) + +You need to upgrade your custom routes to the new Rails syntax. + +## list_public_bodies_default removed + +[Example](https://github.com/openaustralia/alavetelitheme/commit/5927877af996a1afb1a23a950f0d012b52c36f83) + +The list_public_bodies_default helper has been removed from Alaveteli + +## Patching mailer templates has changed + +[Example](https://github.com/openaustralia/alavetelitheme/commit/ffb5242973a0b2acc4981c25659fcb752b92eb97) + +In `lib/patch_mailer_paths.rb` change `ActionMailer::Base.view_paths.unshift File.join(File.dirname(__FILE__), "views")` to `ActionMailer::Base.prepend_view_path File.join(File.dirname(__FILE__), "views")` + +There's also `ActionMailer::Base.append_view_path` for replacing `ActionMailer::Base.view_paths <<`. + +## Rename view templates + +[Example](https://github.com/henare/adminbootstraptheme/commit/b616b636c283ae6cf696a6af1fa481f371baf2b6) + +Rename view templates from `filename.rhtml` to `filename.html.erb`. + +Run this in the root of your theme directory: + +    for r in $(find lib/views -name '*.rhtml'); do echo git mv $r ${r%.rhtml}.html.erb; done + +[GOTCHA!](https://github.com/openaustralia/alavetelitheme/commit/65e775488822367d981bb15ab2cbcf1fce842cc2) +One exception is mailer templates, these should be renamed to +`filename.text.erb` as we only use text emails. + +## The Configuration class has been renamed + +[Example](https://github.com/openaustralia/alavetelitheme/commit/db6cca4650216c6f85acffaea380727344f0f740) + +Due to a naming conflict, `Configuration` has been renamed to `AlaveteliConfiguration`. + +You may have this in your theme for things like `Configuration::site_name`, just change it to `AlaveteliConfiguration::site_name` + +## request.request_uri is deprecated + +[Example](https://github.com/openaustralia/alavetelitheme/commit/d670eeebfb049e1dc83fdb36a628f7722d2ad419) + +Replace instances of `request.request_uri` with `request.fullpath` + +## content-inserting <% %> block helpers are deprecated + +[Example](https://github.com/openaustralia/alavetelitheme/commit/a4b13bbd76249b3a28e2a755cede20dd9db30140) + +The Rails 3 releases notes are [irritatingly +imprecise](http://edgeguides.rubyonrails.org/3_0_release_notes.html#helpers-with-blocks) +about which such helpers have changed.  You can find some candidates +with this `git grep` command: + +    git grep -E '<%[^=].*(_for|_tag|link_to)\b' + +(Ignore `content_for` in those results.) diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index f7893a60d..03d78e0a3 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -1,4 +1,35 @@  require 'mail' +require 'mapi/msg' +require 'mapi/convert' + +module Mail +    class Message + +        # The behaviour of the 'to' and 'cc' methods have changed +        # between TMail and Mail; this monkey-patching restores the +        # TMail behaviour.  The key difference is that when there's an +        # invalid address, e.g. '<foo@example.org', Mail returns the +        # string as an ActiveSupport::Multibyte::Chars, whereas +        # previously TMail would return nil. + +        alias_method :old_to, :to +        alias_method :old_cc, :cc + +        def clean_addresses(old_method, val) +            old_result = self.send(old_method, val) +            old_result.class == Mail::AddressContainer ? old_result : nil +        end + +        def to(val = nil) +            self.clean_addresses :old_to, val +        end + +        def cc(val = nil) +            self.clean_addresses :old_cc, val +        end + +    end +end  module MailHandler      module Backends @@ -38,7 +69,11 @@ module MailHandler              # Get the body of a mail part              def get_part_body(part) -                part.body.decoded +                decoded = part.body.decoded +                if part.content_type =~ /^text\// +                    decoded = convert_string_to_utf8_or_binary decoded, part.charset +                end +                decoded              end              # Return the first from field if any @@ -141,9 +176,14 @@ module MailHandler                      end                  elsif get_content_type(part) == 'application/ms-tnef'                      # A set of attachments in a TNEF file -                    part.rfc822_attachment = mail_from_tnef(part.body.decoded) -                    if part.rfc822_attachment.nil? -                        # Attached mail didn't parse, so treat as binary +                    begin +                        part.rfc822_attachment = mail_from_tnef(part.body.decoded) +                        if part.rfc822_attachment.nil? +                            # Attached mail didn't parse, so treat as binary +                            part.content_type = 'application/octet-stream' +                        end +                    rescue TNEFParsingError +                        part.rfc822_attachment = nil                          part.content_type = 'application/octet-stream'                      end                  end @@ -160,8 +200,11 @@ module MailHandler                    part.parts.each{ |sub_part| expand_and_normalize_parts(sub_part, parent_mail) }                  else                    part_filename = get_part_file_name(part) -                  charset = part.charset # save this, because overwriting content_type also resets charset - +                  if part.has_charset? +                      original_charset = part.charset # save this, because overwriting content_type also resets charset +                  else +                      original_charset = nil +                  end                    # Don't allow nil content_types                    if get_content_type(part).nil?                        part.content_type = 'application/octet-stream' @@ -180,7 +223,9 @@ module MailHandler                    # Use standard content types for Word documents etc.                    part.content_type = normalise_content_type(get_content_type(part))                    decode_attached_part(part, parent_mail) -                  part.charset = charset +                  if original_charset +                      part.charset = original_charset +                  end                  end              end @@ -228,8 +273,15 @@ module MailHandler              def _get_attachment_leaves_recursive(part, within_rfc822_attachment, parent_mail)                  leaves_found = []                  if part.multipart? -                    raise "no parts on multipart mail" if part.parts.size == 0 -                    if part.sub_type == 'alternative' +                    if part.parts.size == 0 +                        # This is typically caused by a missing final +                        # MIME boundary, in which case the text of the +                        # message (including the opening MIME +                        # boundary) is in part.body, so just add this +                        # part as a leaf and treat it as text/plain: +                        part.content_type = "text/plain" +                        leaves_found += [part] +                    elsif part.sub_type == 'alternative'                          best_part = choose_best_alternative(part)                          leaves_found += _get_attachment_leaves_recursive(best_part,                                                                           within_rfc822_attachment, diff --git a/lib/mail_handler/mail_handler.rb b/lib/mail_handler/mail_handler.rb index 22ba26b97..9c955cccd 100644 --- a/lib/mail_handler/mail_handler.rb +++ b/lib/mail_handler/mail_handler.rb @@ -8,20 +8,23 @@ module MailHandler      require 'backends/mail_backend'      include Backends::MailBackend +    class TNEFParsingError < StandardError +    end +      # Returns a set of attachments from the given TNEF contents      # The TNEF contents also contains the message body, but in general this is the      # same as the message body in the message proper.      def tnef_attachments(content)          attachments = []          Dir.mktmpdir do |dir| -            IO.popen("#{`which tnef`.chomp} -K -C #{dir}", "wb") do |f| +            IO.popen("tnef -K -C #{dir} 2> /dev/null", "wb") do |f|                  f.write(content)                  f.close                  if $?.signaled?                      raise IOError, "tnef exited with signal #{$?.termsig}"                  end                  if $?.exited? && $?.exitstatus != 0 -                    raise IOError, "tnef exited with status #{$?.exitstatus}" +                    raise TNEFParsingError, "tnef exited with status #{$?.exitstatus}"                  end              end              found = 0 @@ -34,7 +37,7 @@ module MailHandler                  end              end              if found == 0 -                raise IOError, "tnef produced no attachments" +                raise TNEFParsingError, "tnef produced no attachments"              end          end          attachments diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb new file mode 100644 index 000000000..f02b18ee0 --- /dev/null +++ b/lib/normalize_string.rb @@ -0,0 +1,86 @@ +require 'iconv' unless RUBY_VERSION.to_f >= 1.9 +require 'charlock_holmes' + +class EncodingNormalizationError < StandardError +end + +def normalize_string_to_utf8(s, suggested_character_encoding=nil) + +    # Make a list of encodings to try: +    to_try = [] + +    guessed_encoding = CharlockHolmes::EncodingDetector.detect(s)[:encoding] +    guessed_encoding ||= '' + +    # It's reasonably common for windows-1252 text to be mislabelled +    # as ISO-8859-1, so try that first if charlock_holmes guessed +    # that.  However, it can also easily misidentify UTF-8 strings as +    # ISO-8859-1 so we don't want to go with the guess by default... +    to_try.push guessed_encoding if guessed_encoding.downcase == 'windows-1252' + +    to_try.push suggested_character_encoding if suggested_character_encoding +    to_try.push 'UTF-8' +    to_try.push guessed_encoding + +    to_try.each do |from_encoding| +        if RUBY_VERSION.to_f >= 1.9 +            begin +                s.force_encoding from_encoding +                return s.encode('UTF-8') if s.valid_encoding? +            rescue ArgumentError +                # We get this is there are invalid bytes when +                # interpreted as from_encoding at the point of +                # the encode('UTF-8'); move onto the next one... +            end +        else +            to_encoding = 'UTF-8' +            begin +                converted = Iconv.conv 'UTF-8', from_encoding, s +                return converted +            rescue Iconv::Failure +                # We get this is there are invalid bytes when +                # interpreted as from_encoding at the point of +                # the Iconv.iconv; move onto the next one... +            end +        end +    end +    raise EncodingNormalizationError, "Couldn't find a valid character encoding for the string" + +end + +def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil) +    # This function exists to help to keep consistent with the +    # behaviour of earlier versions of Alaveteli: in the code as it +    # is, there are situations where it's expected that we generally +    # have a UTF-8 encoded string, but if the source data was +    # unintepretable under any character encoding, the string may be +    # binary data (i.e. invalid UTF-8).  Such a string would then be +    # mangled into valid UTF-8 by _sanitize_text for the purposes of +    # display. + +    # This seems unsatisfactory to me - two better alternatives would +    # be either: (a) to mangle the data into valid UTF-8 in this +    # method or (b) to treat the 'text/*' attachment as +    # 'application/octet-stream' instead.  However, for the purposes +    # of the transition to Ruby 1.9 and/or Rails 3 we just want the +    # behaviour to be as similar as possible. + +    begin +        result = normalize_string_to_utf8 s, suggested_character_encoding +    rescue EncodingNormalizationError +        result = s +        s.force_encoding 'ASCII-8BIT' if RUBY_VERSION.to_f >= 1.9 +    end +    result +end + +def log_text_details(message, text) +    if RUBY_VERSION.to_f >= 1.9 +        STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}" +    else +        STDERR.puts "#{message}, we have text: #{text}, of class #{text.class}" +    end +    filename = "/var/tmp/#{Digest::MD5.hexdigest(text)}.txt" +    File.open(filename, "wb") { |f| f.write text } +    STDERR.puts "#{message}, the filename is: #{filename}" +end diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake index e49a84ecb..f0085b5e1 100644 --- a/lib/tasks/temp.rake +++ b/lib/tasks/temp.rake @@ -50,4 +50,154 @@ namespace :temp do          end      end +    desc 'Create a CSV file of a random selection of raw emails, for comparing hexdigests' +    task :random_attachments_hexdigests => :environment do + +        # The idea is to run this under the Rail 2 codebase, where +        # Tmail was used to extract the attachements, and the task +        # will output all of those file paths in a CSV file, and a +        # list of the raw email files in another.  The latter file is +        # useful so that one can easily tar up the emails with: +        # +        #   tar cvz -T raw-email-files -f raw_emails.tar.gz +        # +        # Then you can switch to the Rails 3 codebase, where +        # attachment parsing is done via +        # recompute_attachments_hexdigests + +        require 'csv' + +        File.open('raw-email-files', 'w') do |f| +            CSV.open('attachment-hexdigests.csv', 'w') do |csv| +                csv << ['filepath', 'i', 'url_part_number', 'hexdigest'] +                IncomingMessage.all(:order => 'RANDOM()', :limit => 1000).each do |incoming_message| +                    # raw_email.filepath fails unless the +                    # incoming_message has an associated request +                    next unless incoming_message.info_request +                    raw_email = incoming_message.raw_email +                    f.puts raw_email.filepath +                    incoming_message.foi_attachments.each_with_index do |attachment, i| +                        csv << [raw_email.filepath, i, attachment.url_part_number, attachment.hexdigest] +                    end +                end +            end +        end + +    end + + +    desc 'Check the hexdigests of attachments in emails on disk' +    task :recompute_attachments_hexdigests => :environment do + +        require 'csv' +        require 'digest/md5' + +        OldAttachment = Struct.new :filename, :attachment_index, :url_part_number, :hexdigest + +        filename_to_attachments = Hash.new {|h,k| h[k] = []} + +        header_line = true +        CSV.foreach('attachment-hexdigests.csv') do |filename, attachment_index, url_part_number, hexdigest| +            if header_line +                header_line = false +            else +                filename_to_attachments[filename].push OldAttachment.new filename, attachment_index, url_part_number, hexdigest +            end +        end + +        total_attachments = 0 +        attachments_with_different_hexdigest = 0 +        files_with_different_numbers_of_attachments = 0 +        no_tnef_attachments = 0 +        no_parts_in_multipart = 0 + +        multipart_error = "no parts on multipart mail" +        tnef_error = "tnef produced no attachments" + +        # Now check each file: +        filename_to_attachments.each do |filename, old_attachments| + +            # Currently it doesn't seem to be possible to reuse the +            # attachment parsing code in Alaveteli without saving +            # objects to the database, so reproduce what it does: + +            raw_email = nil +            File.open(filename) do |f| +                raw_email = f.read +            end +            mail = MailHandler.mail_from_raw_email(raw_email) + +            begin +                attachment_attributes = MailHandler.get_attachment_attributes(mail) +            rescue IOError => e +                if e.message == tnef_error +                    puts "#{filename} #{tnef_error}" +                    no_tnef_attachments += 1 +                    next +                else +                    raise +                end +            rescue Exception => e +                if e.message == multipart_error +                    puts "#{filename} #{multipart_error}" +                    no_parts_in_multipart += 1 +                    next +                else +                    raise +                end +            end + +            if attachment_attributes.length != old_attachments.length +                puts "#{filename} the number of old attachments #{old_attachments.length} didn't match the number of new attachments #{attachment_attributes.length}" +                files_with_different_numbers_of_attachments += 1 +            else +                old_attachments.each_with_index do |old_attachment, i| +                    total_attachments += 1 +                    attrs = attachment_attributes[i] +                    old_hexdigest = old_attachment.hexdigest +                    new_hexdigest = attrs[:hexdigest] +                    new_content_type = attrs[:content_type] +                    old_url_part_number = old_attachment.url_part_number.to_i +                    new_url_part_number = attrs[:url_part_number] +                    if old_url_part_number != new_url_part_number +                        puts "#{i} #{filename} old_url_part_number #{old_url_part_number}, new_url_part_number #{new_url_part_number}" +                    end +                    if old_hexdigest != new_hexdigest +                        body = attrs[:body] +                        # First, if the content type is one of +                        # text/plain, text/html or application/rtf try +                        # changing CRLF to LF and calculating a new +                        # digest - we generally don't worry about +                        # these changes: +                        new_converted_hexdigest = nil +                        if ["text/plain", "text/html", "application/rtf"].include? new_content_type +                            converted_body = body.gsub /\r\n/, "\n" +                            new_converted_hexdigest = Digest::MD5.hexdigest converted_body +                            puts "new_converted_hexdigest is #{new_converted_hexdigest}" +                        end +                        if (! new_converted_hexdigest) || (old_hexdigest != new_converted_hexdigest) +                            puts "#{i} #{filename} old_hexdigest #{old_hexdigest} wasn't the same as new_hexdigest #{new_hexdigest}" +                            puts "  body was of length #{body.length}" +                            puts "  content type was: #{new_content_type}" +                            path = "/tmp/#{new_hexdigest}" +                            f = File.new path, "w" +                            f.write body +                            f.close +                            puts "  wrote body to #{path}" +                            attachments_with_different_hexdigest += 1 +                        end +                    end +                end +            end + +        end + +        puts "total_attachments: #{total_attachments}" +        puts "attachments_with_different_hexdigest: #{attachments_with_different_hexdigest}" +        puts "files_with_different_numbers_of_attachments: #{files_with_different_numbers_of_attachments}" +        puts "no_tnef_attachments: #{no_tnef_attachments}" +        puts "no_parts_in_multipart: #{no_parts_in_multipart}" + +    end +  end diff --git a/spec/controllers/api_controller_spec.rb b/spec/controllers/api_controller_spec.rb index 749be9f85..66b8e33f0 100644 --- a/spec/controllers/api_controller_spec.rb +++ b/spec/controllers/api_controller_spec.rb @@ -259,7 +259,7 @@ describe ApiController, "when using the API" do          attachments.size.should == 1          attachment = attachments[0]          attachment.filename.should == "tfl.pdf" -        attachment.body.should == load_file_fixture("tfl.pdf", as_binary=true) +        attachment.body.should == load_file_fixture("tfl.pdf")      end      it "should show information about a request" do diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb index 657837c72..9cc60a103 100644 --- a/spec/controllers/request_controller_spec.rb +++ b/spec/controllers/request_controller_spec.rb @@ -477,11 +477,11 @@ describe RequestController, "when showing one request" do              (assigns[:info_request_events].size - size_before).should == 1              ir.reload -            get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1 +            get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1              response.content_type.should == "text/plain"              response.should contain "Second hello" -            get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 3, :file_name => 'hello.txt', :skip_cache => 1 +            get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 3, :file_name => 'hello world.txt', :skip_cache => 1              response.content_type.should == "text/plain"              response.should contain "First hello"          end @@ -494,7 +494,7 @@ describe RequestController, "when showing one request" do              get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id,                                   :id => ir.id,                                   :part => 2, -                                 :file_name => 'hello.txt' +                                 :file_name => 'hello world.txt'          end          it "should convert message body to UTF8" do @@ -508,7 +508,7 @@ describe RequestController, "when showing one request" do              ir = info_requests(:fancy_dog_request)              receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)              ir.reload -            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 +            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1              response.content_type.should == "text/html"              response.should contain "Second hello"          end @@ -529,11 +529,11 @@ describe RequestController, "when showing one request" do              ir.reload              ugly_id = "55195"              lambda { -                get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 +                get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1              }.should raise_error(ActiveRecord::RecordNotFound)              lambda { -                get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1 +                get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1              }.should raise_error(ActiveRecord::RecordNotFound)          end          it "should return 404 when incoming message and request ids don't match" do @@ -542,7 +542,7 @@ describe RequestController, "when showing one request" do              receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)              ir.reload              lambda { -                get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 +                get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1              }.should raise_error(ActiveRecord::RecordNotFound)          end          it "should return 404 for ugly URLs contain a request id that isn't an integer, even if the integer prefix refers to an actual request" do @@ -552,11 +552,11 @@ describe RequestController, "when showing one request" do              ugly_id = "%d95" % [info_requests(:naughty_chicken_request).id]              lambda { -                get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 +                get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1              }.should raise_error(ActiveRecord::RecordNotFound)              lambda { -                get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1 +                get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1              }.should raise_error(ActiveRecord::RecordNotFound)          end          it "should return 404 when incoming message and request ids don't match" do @@ -565,7 +565,7 @@ describe RequestController, "when showing one request" do              receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)              ir.reload              lambda { -                get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 +                get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1              }.should raise_error(ActiveRecord::RecordNotFound)          end @@ -573,44 +573,66 @@ describe RequestController, "when showing one request" do              ir = info_requests(:fancy_dog_request)              receive_incoming_mail('incoming-request-pdf-attachment.email', ir.incoming_email)              ir.reload -            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'fs_50379341.pdf.html', :skip_cache => 1 +            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'fs 50379341.pdf.html', :skip_cache => 1              response.content_type.should == "text/html"              response.should contain "Walberswick Parish Council"          end -        it "should not cause a reparsing of the raw email, even when the result would be a 404" do +        it "should not cause a reparsing of the raw email, even when the attachment can't be found" do              ir = info_requests(:fancy_dog_request)              receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)              ir.reload -            attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2) +            attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(ir.incoming_messages[1].get_attachments_for_display, 2, 'hello world.txt')              attachment.body.should contain "Second hello"              # change the raw_email associated with the message; this only be reparsed when explicitly asked for              ir.incoming_messages[1].raw_email.data = ir.incoming_messages[1].raw_email.data.sub("Second", "Third") -            # asking for an attachment by the wrong filename results -            # in a 404 for browsing users.  This shouldn't cause a -            # re-parse... -            lambda { -                get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.baz.html', :skip_cache => 1 -            }.should raise_error(ActiveRecord::RecordNotFound) +            # asking for an attachment by the wrong filename should result in redirecting +            # back to the incoming message, but shouldn't cause a reparse: +            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.baz.html', :skip_cache => 1 +            response.status.should == 303 -            attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2) +            attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(ir.incoming_messages[1].get_attachments_for_display, 2, 'hello world.txt')              attachment.body.should contain "Second hello"              # ...nor should asking for it by its correct filename... -            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 +            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1              response.should_not contain "Third hello"              # ...but if we explicitly ask for attachments to be extracted, then they should be              force = true              ir.incoming_messages[1].parse_raw_email!(force)              ir.reload -            attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2) +            attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(ir.incoming_messages[1].get_attachments_for_display, 2, 'hello world.txt')              attachment.body.should contain "Third hello" -            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 +            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1              response.should contain "Third hello"          end +        it "should redirect to the incoming message if there's a wrong part number and an ambiguous filename" do +            ir = info_requests(:fancy_dog_request) +            receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) +            ir.reload + +            im = ir.incoming_messages[1] + +            attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(im.get_attachments_for_display, 5, 'hello world.txt') +            attachment.should be_nil + +            get :get_attachment_as_html, :incoming_message_id => im.id, :id => ir.id, :part => 5, :file_name => 'hello world.txt', :skip_cache => 1 +            response.status.should == 303 +            new_location = response.header['Location'] +            new_location.should match(/request\/#{ir.url_title}#incoming-#{im.id}/) +        end + +        it "should find a uniquely named filename even if the URL part number was wrong" do +            ir = info_requests(:fancy_dog_request) +            receive_incoming_mail('incoming-request-pdf-attachment.email', ir.incoming_email) +            ir.reload +            get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 5, :file_name => 'fs 50379341.pdf', :skip_cache => 1 +            response.content_type.should == "application/pdf" +        end +          it "should treat attachments with unknown extensions as binary" do              ir = info_requests(:fancy_dog_request)              receive_incoming_mail('incoming-request-attachment-unknown-extension.email', ir.incoming_email) @@ -625,10 +647,8 @@ describe RequestController, "when showing one request" do              ir = info_requests(:fancy_dog_request)              receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) -            lambda { -                get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, -                    :file_name => 'http://trying.to.hack' -            }.should raise_error(ActiveRecord::RecordNotFound) +            get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'http://trying.to.hack' +            response.status.should == 303          end          it "should censor attachments downloaded as binary" do @@ -644,7 +664,7 @@ describe RequestController, "when showing one request" do              begin                  receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) -                get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1 +                get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1                  response.content_type.should == "text/plain"                  response.should contain "xxxxxx hello"              ensure @@ -666,7 +686,7 @@ describe RequestController, "when showing one request" do                  receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)                  ir.reload -                get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1 +                get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1                  response.content_type.should == "text/plain"                  response.should contain "xxxxxx hello"              ensure @@ -695,11 +715,13 @@ describe RequestController, "when showing one request" do              # so at this point, assigns[:info_request].incoming_messages[1].get_attachments_for_display is returning stuff, but the equivalent thing in the template isn't.              # but something odd is that the above is return a whole load of attachments which aren't there in the controller              response.body.should have_selector("p.attachment strong") do |s| -                s.should contain /hello.txt/m +                s.should contain /hello world.txt/m              end              censor_rule = CensorRule.new() -            censor_rule.text = "hello.txt" +            # Note that the censor rule applies to the original filename, +            # not the display_filename: +            censor_rule.text = "hello-world.txt"              censor_rule.replacement = "goodbye.txt"              censor_rule.last_edit_editor = "unknown"              censor_rule.last_edit_comment = "none" @@ -743,7 +765,7 @@ describe RequestController, "when showing one request" do                  old_path = assigns[:url_path]                  response.location.should contain /#{assigns[:url_path]}$/                  zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", old_path)) { |zipfile| -                    zipfile.count.should == 3 # the message plus two "hello.txt" files +                    zipfile.count.should == 3 # the message plus two "hello-world.txt" files                  }                  # The path of the zip file is based on the hash of the timestamp of the last request @@ -756,7 +778,7 @@ describe RequestController, "when showing one request" do                  assigns[:url_path].should_not == old_path                  response.location.should contain assigns[:url_path]                  zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", assigns[:url_path])) { |zipfile| -                    zipfile.count.should == 4 # the message, two hello.txt plus the unknown attachment +                    zipfile.count.should == 4 # the message, two hello-world.txt plus the unknown attachment                  }              end @@ -875,7 +897,7 @@ describe RequestController, "when changing prominence of a request" do              get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id,                                        :id => ir.id,                                        :part => 2, -                                      :file_name => 'hello.txt' +                                      :file_name => 'hello world.txt'          end.should raise_error(ActiveRecord::RecordNotFound)      end @@ -890,7 +912,7 @@ describe RequestController, "when changing prominence of a request" do              get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id,                                        :id => ir.id,                                        :part => 2, -                                      :file_name => 'hello.txt' +                                      :file_name => 'hello world.txt'          end.should raise_error(ActiveRecord::RecordNotFound)      end @@ -2394,7 +2416,7 @@ describe RequestController, "when caching fragments" do          attachment = mock(FoiAttachment, :display_filename => long_name,                                           :body_as_html => ['some text', 'wrapper'])          IncomingMessage.stub!(:find).with("44").and_return(incoming_message) -        IncomingMessage.stub!(:get_attachment_by_url_part_number).and_return(attachment) +        IncomingMessage.stub!(:get_attachment_by_url_part_number_and_filename).and_return(attachment)          InfoRequest.stub!(:find).with("132").and_return(info_request)          params = { :file_name => long_name,                     :controller => "request", diff --git a/spec/fixtures/files/incoming-request-two-same-name.email b/spec/fixtures/files/incoming-request-two-same-name.email index f1024d607..ecd322fe4 100644 --- a/spec/fixtures/files/incoming-request-two-same-name.email +++ b/spec/fixtures/files/incoming-request-two-same-name.email @@ -13,13 +13,13 @@ Content-Disposition: inline  --Q68bSM7Ycu6FN28Q  Content-Type: text/plain; charset=us-ascii -Content-Disposition: attachment; filename="hello.txt" +Content-Disposition: attachment; filename="hello-world.txt"  Second hello  --Q68bSM7Ycu6FN28Q  Content-Type: text/plain; charset=us-ascii -Content-Disposition: attachment; filename="hello.txt" +Content-Disposition: attachment; filename="hello-world.txt"  First hello diff --git a/spec/fixtures/files/inline-uuencode.email b/spec/fixtures/files/inline-uuencode.email new file mode 100644 index 000000000..3134ba3ad --- /dev/null +++ b/spec/fixtures/files/inline-uuencode.email @@ -0,0 +1,27 @@ +From foo@bar Mon Jun 01 17:14:44 2009 +Return-path: <foo@bar> +Envelope-to: foi@quux +Delivery-date: Mon, 01 Jun 2009 17:14:44 +0100 +From: <foo@bar> +To: <request-whatever@quux> +Subject: something or other +Date: Mon, 1 Jun 2009 17:14:37 +0100 +X-MimeOLE: Produced By Microsoft MimeOLE V6.00.3790.181 +Message-ID: <baz@xyzzy> + +Thanks for your email - here's a truncated attachment +for you: + +********************************************************************** + +begin 666 ResponseT7363 9.doc +MT,\1X*&Q&N$`````````````````````/@`#`/[_"0`&```````````````" +M````) ``````````$ ``+@````$```#^____`````",```!L````________ +M```````````````````````````````````````````````````````````` +M```````````````````````````````````````````````````````````` +#```` +` +end + +The original of this email was scanned for viruses or something +like that. diff --git a/spec/fixtures/files/malformed-to-and-cc.email b/spec/fixtures/files/malformed-to-and-cc.email new file mode 100644 index 000000000..4fbb6e21e --- /dev/null +++ b/spec/fixtures/files/malformed-to-and-cc.email @@ -0,0 +1,11 @@ +From foo@bar Wed Mar 12 14:58:26 2008 +Return-path: <foo@bar> +Subject: example email +To: <bar@example.org +Cc: baz@example.org> +From: quux@example.org +Date: Mon, 7 May 2012 12:47:06 +0100 +Mime-Version: 1.0 +Content-Type: text/plain; charset=utf-8 + +A very basic email, but with malformed To: and Cc: lines diff --git a/spec/fixtures/files/mislabelled-as-iso-8859-1.email b/spec/fixtures/files/mislabelled-as-iso-8859-1.email new file mode 100644 index 000000000..6c8e6109e --- /dev/null +++ b/spec/fixtures/files/mislabelled-as-iso-8859-1.email @@ -0,0 +1,20 @@ +From foo@bar Thu Mar 01 15:02:33 2012 +Return-path: <foo@bar> +Envelope-to: foi@quux +Delivery-date: Thu, 01 Mar 2012 15:02:33 +0000 +Date: Thu, 01 Mar 2012 15:01:58 +0000 +Subject: some FOI request +To: foi@quux +From: foo@bar +MIME-Version: 1.0 +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: 7bit +Message-Id: <2468@bar.local> + +Dear Whoever, + +THERE'S A DASH NEXT  REQUEST FOR INFORMATION + +Best regards, +Other Person + diff --git a/spec/fixtures/files/multipart-no-final-boundary.email b/spec/fixtures/files/multipart-no-final-boundary.email new file mode 100644 index 000000000..9c16dad52 --- /dev/null +++ b/spec/fixtures/files/multipart-no-final-boundary.email @@ -0,0 +1,21 @@ +From foo@bar Thu Sep 13 10:34:44 2012 +Return-path: <foo@bar> +Envelope-to: foi@example.org +Delivery-date: Thu, 13 Sep 2012 10:34:44 +0100 +From: foo@bar +To: foi@example.org +Subject: an acknowledgement email +Date: Thu, 13 Sep 2012 10:08:03 +0100 +Message-ID: <987654@foo.local> +Content-Type: multipart/mixed; boundary="-----7D81B75CCC90D2974F7A1CBD" + +This is a multi-part message in MIME format. +-------7D81B75CCC90D2974F7A1CBD +Content-Type: text/html + +<div> +  <p> +    This is an acknowledgement of your email, that irritatingly +    leaves out the final MIME boundary. +  </p> +<div> diff --git a/spec/fixtures/files/nested-attachments-premature-end.email b/spec/fixtures/files/nested-attachments-premature-end.email new file mode 100644 index 000000000..6b13808dc --- /dev/null +++ b/spec/fixtures/files/nested-attachments-premature-end.email @@ -0,0 +1,110 @@ +From someone@example.org Mon May 15 13:10:29 2012 +Return-path: <someone@example.org> +Envelope-to: foi@example.org +Delivery-date: Mon, 15 May 2012 13:10:29 +0100 +Message-Id: <abcde@baz.local> +Date: Mon, 15 May 2012 09:48:48 +0100 +From: "Example Person" <someone@example.org> +To: <request@example.org> +Subject: some FOI request or other +Mime-Version: 1.0 +Content-Type: multipart/mixed; boundary="=__outer__=" + +This is a MIME message. If you are reading this text, you may want to +consider changing to a mail reader or gateway that understands how to +properly handle MIME multipart messages. + +--=__outer__= +Content-Type: multipart/alternative; boundary="=__inner__=" + +--=__inner__= +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: quoted-printable +X-MIME-Autoconverted: from 8bit to quoted-printable by something + +Hello +=20 +Please find some information attached. +=20 + +--=__inner__= +Content-Description: HTML +Content-Type: text/html; charset="utf-8" +Content-Transfer-Encoding: quoted-printable + +<html> +  <head> +    <title>some title text</title> +  </head> +  <body> +    <p>blah blah blah</p> +  </body> +</html> + +--=__inner__=-- + +--=__outer__= +Content-Type: message/rfc822 + +Return-path: <foo@bar> +Date: Mon, 7 May 2012 12:47:06 +0100 +From: someone-else@example.org +To: foi@example.org +Message-Id: <56789@quux.local> +Subject: a freedom of information requests +Mime-Version: 1.0 +Content-Type: text/plain; charset=utf-8 + +     Dear Whoever, + +     Please could you let me know, um, whatever ... + +     Yours faithfully, + +     Whoever I Am + +--=__outer__= +Content-Type: text/plain; charset=US-ASCII +Content-Disposition: inline +Content-Transfer-Encoding: quoted-printable + +     Dear Whowever, +    =20 +     Please could you let me know, um, whatever ... +    =20 +     Yours faithfully, +    =20 +     Whoever I Am +    =20 + +--=__outer__=-- + +--=__outer__= +Content-Type: application/png; name="maroon-square.png" +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; filename="maroon-square.png" + +iVBORw0KGgoAAAANSUhEUgAAAEEAAABCCAYAAAAIY7vrAAAABmJLR0QA/wD/AP+g +vaeTAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAB3RJTUUH3QQeDSEx8qultwAAABl0 +RVh0Q29tbWVudABDcmVhdGVkIHdpdGggR0lNUFeBDhcAAAMzSURBVHja7VtL2psw +DNS4rPv1Gj1Kt71Az9ZT9F7dN9MFGGThB/YfKDX2Kp8DRBpLowcKvn/5ShERiAgl +srh8aT93tJzWdae8XR0CEICwUx59K54H4QFKp0Eg5alrAwEYIDx5DRAGCAOEAcIA +QaUFfDoIHJawpEbOPd0dRPjJDWIUiEwt933+8es2Ovz++a3dCkREXmwD4ZbsVln6 +cLkef14duAMqAGCkY0A+jBNgXGFZU/eKa3fhZjlQqLhHKF9oFbpulE2Z/oFrXTd+ +nlOWkn1dMHXrAiWguq0iG9uk/REjBggPtgQOED781my4wwBhgDBAmPmUAwR0X0UO +dxggnA8CO5xocU8HoAoEDwA6nOyCH+ZMKQ4zy+QbNBoUirquMPBJcgPyJkOi+c7S +ohhn6ZctzDIrcFalIspYILG1et9WABUtt6WztLq+/0Amp9sCnsCBUhfvK4FLiRCA +QwC7JABGTngrIIPnIjf6R5We0uxz3j+FbCvdy2nlY/IgcfrMRQuFHIC9Sap3AW8n +2gZ+cZYCVn4LzBxxnykNgJpWN8lt7yw+QCMxan2s8lQXcNlDlpAW7YmIXMszTgoH +rU91+8OFYXN9ikz/LyLgExSCDlaO+cdGsIEQkyUAIgFMKRTEn3vDjFFHwWSIzEQC +cmN4IHVNGG2PQXhhsuRl3jihwQyB6H1274gV1BhKLKNt4ZEpkygeeoC+xytdK1cr +oX0EACphnTZXbbLMmL/YBGo9lSU1OmBONMnTlQUqTa4y1VgAddg0hdTR04lyT0Xq +8RYAyHVyBX6ET/9wTBD6TWVCMH5Qo3yhXju3bNY/BBMdsoLYBMmnzQdOP56O36s5 +40r1D7UWYV5dNT2nbxVBAHb43Y36CdbXfTii6isU/U7ZXLQ4w/V/wotFoilVF2kl +w7YCDrIPkj4/G9fao7q0rYSSJdgeSqmQrCU+r/j8rOv/gpuKPm5Lffen5eN+ljeo +rcfW0Om2Enm9KwDZAgrG98txX9cMe6X2E5SGU29VTE17lFAUkMybsXclndu31BGX +hcgWv8oxonYtkf/jhc10WPGgm2IZncKlu+sg8vLm7hDSwk3f2/wFEzN3v6aAXQ0A +AAAASUVORK5CYII= + +--=__outer__=-- + diff --git a/spec/fixtures/files/no-part-charset-random-data.email b/spec/fixtures/files/no-part-charset-random-data.email new file mode 100644 index 000000000..d51fd3f38 --- /dev/null +++ b/spec/fixtures/files/no-part-charset-random-data.email @@ -0,0 +1,30 @@ +From xxxx@yahoo.cn Mon Oct 08 14:01:34 2012 +Return-path: <xxxx@yahoo.cn> +Envelope-to: foi@atlas.ukcod.org.uk +Delivery-date: Mon, 08 Oct 2012 14:01:34 +0100 +Received: (qmail 63864 invoked from network); 8 Oct 2012 13:01:12 -0000 +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=yahoo.cn; s=s1024; t=1349701272; bh=T/mtlIYvhB/L5RO+CvTazeAdGf1n1zsGXBoA8EKGT9M=; h=Message-ID:X-Yahoo-Newman-Property:X-YMail-OSG:X-Yahoo-SMTP:Received:X-mailer:From:Subject:To:Content-Transfer-Encoding:Content-Type:Date; b=LYI/PXvA7DA746bmyprChUg7N8YDvN9XE/bhfTt5MW7siOmxHHzn1w+s5X33PvLI0x0UfJLo+MCkTnGPKnG5BYY38US8PkocJYyphrvF/eaUl3ALf8UvxHBOJX1iIi89Xp2NnfbS8lz9kZAWifb9GOnOA5/kLDcL5/WJXliit2k= +Message-ID: <xxxx@xxxx.yahoo.com> +X-Yahoo-Newman-Property: ymail-5 +X-YMail-OSG: nPs5jgsVM1myUoKjeEPTxxalz4BM6BZMEUYu.E8NPMPQyo_ + Yej8T2WCTurn767NOwhuDIqNxC2QGZINqfjmKcdyW7a1P_Zxqr9GsjgxODci + ihwr7qYAGDDbcsrB.PX4epnJZHl3yAwoGW.1ReEZnXQANFcNep7.zNEbZ_2k + RU1IhI9aHYvxPxt5RWugwOoFRh9P8Ym35A88IMazNtVaBiBEXF6Vk8Aqr9XP + 3Vh9xOT9Pn6X8qOUjNXkdb3xB4S5AAIRSE9mqhL1KzHBwdVQs25IoM_2FV2b + gPsQGgL4_mwBH0WcEMhdj7Kn6Nfb44L.50E_V3DH.8P7KzDK8zNVXSbAqohX + Qi6MzUK2frr8IyZyYzHb.ekff7kAcJgUoHvhnyPar8tRYxhQT3_xsUTzsx8N + oWckVPh_i3OT7U4ObgekqgtteMoYqPH2eF1SZXamGBAs- +X-Yahoo-SMTP: YUQHwRWswBDjbw_M.D6EP4KpT9khlJErDRBQi4ySZQ-- +X-mailer: MIME::Lite 3.027 (F2.74; T1.31; A2.07; B3.13; Q3.13) +From: =?GB2312?B?zsJKaWFu?= Bing <xxxx@yahoo.cn> +Subject: =?GB2312?B?yM7A1svJ?= +To: FOI Person <EMAIL_TO> +Content-Transfer-Encoding: base64 +Content-Type: text/plain +Date: Tue, 9 Oct 2012 20:53:06 +0800 + +HPBSqsndNBX+ER4hyBoPhhnclcWKVFgbevdD5cJvfI/ARbxRYqA28hZ49Pf6A/ks +NdVh4N5VPgRs/7SHYPfw5625pZJYTLj6nVdYk76sxnjiiAmwCJWGjPoWvO7nHUBv +fuLXtNVq5HmD0bWWjAbSk2n74PW7v5izbNO2fjHyiyX2CIof0rriXDmOldJqoebO +ejybrjG+Tahpu3FF1Mw98HfswzkdB46u/izLCzdUQVM= + diff --git a/spec/fixtures/files/part-without-charset-in-content-type.email b/spec/fixtures/files/part-without-charset-in-content-type.email new file mode 100644 index 000000000..439d52cc3 --- /dev/null +++ b/spec/fixtures/files/part-without-charset-in-content-type.email @@ -0,0 +1,38 @@ +From example@example.com Wed Sep 15 17:55:40 2010 +Return-path: <example@example.com> +Envelope-to: example@example.com +Delivery-date: Wed, 15 Sep 2010 17:55:40 +0100 +From: <example@example.com> +To: <request-xxxxx@whatdotheyknow.com> +Date: Wed, 15 Sep 2010 17:56:03 +0100 +Subject: FOI Internal Review response +Thread-Topic: FOI Internal Review response +Thread-Index: xxxxx +Message-ID: <xxxxxx> +Accept-Language: en-US, en-GB +Content-Language: en-US +X-MS-Has-Attach: yes +X-MS-TNEF-Correlator: +acceptlanguage: en-US, en-GB +Content-Type: multipart/mixed; +    boundary="_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_" +MIME-Version: 1.0 + +--_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +someencodedtext= + +--_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_ +Content-Type: document/pdf; name="document.pdf" +Content-Description: document.pdf +Content-Disposition: attachment; filename="document.pdf"; +    size=62103; creation-date="Wed, 15 Sep 2010 17:54:27 GMT"; +    modification-date="Wed, 15 Sep 2010 17:54:27 GMT" +Content-Transfer-Encoding: base64 + +somemoreencodedtext= + +--_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_-- + diff --git a/spec/fixtures/files/tnef-attachment-empty.email b/spec/fixtures/files/tnef-attachment-empty.email new file mode 100644 index 000000000..7967aa95b --- /dev/null +++ b/spec/fixtures/files/tnef-attachment-empty.email @@ -0,0 +1,196 @@ +From hello@blah.local Fri Feb 21 16:23:14 2013 +Return-path: <bar@example.org> +Envelope-to: foo@example.org +Delivery-date: Fri, 21 Feb 2013 16:23:14 +0000 +Content-Type: multipart/mixed; +    boundary="_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_" +From: <bar@example.org> +To: <foo@example.org> +Sender: <hello@blah.local> +Date: Fri, 21 Feb 2013 16:23:04 +0000 +Subject: here's a useless email +Message-ID: <12345@blah.local> +Accept-Language: en-US, en-GB +Content-Language: en-US +X-MS-Has-Attach: +X-MS-TNEF-Correlator: <12345@blah.local> +acceptlanguage: en-US, en-GB +MIME-Version: 1.0 + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_ +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: quoted-printable + +This attachment just has a body from one of the tests +in the tnef package in Debian. + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_ +Content-Disposition: attachment; filename="winmail.dat" +Content-Transfer-Encoding: base64 +Content-Type: application/ms-tnef; name="winmail.dat" + +eJ8+IiURAQaQCAAEAAAAAAABAAEAAQeQBgAIAAAA5AQAAAAAAADoAAENgAQAAgAA +AAIAAgABBYADAA4AAADVBwQAGQAKAA8AIwABADYBASCAAwAOAAAA1QcEABkACgAP +ACQAAQA3AQEJgAEAIQAAADBEREEwRkNCQ0MwN0MxNDE5MkVFODZGQzQyRDE1Qjk1 +AGYHAQSQBgBkAgAAAQAAAA8AAAAfAAEwAQAAABAAAAAzAGsAdQBzAGUAcgAyAAAA +HwACMAEAAAAGAAAARQBYAAAAAAAfAAMwAQAAAI4AAAAvAE8APQBCAFIALQBFAFgA +QwBIAC0AVABFAFMAVAAvAE8AVQA9AEYASQBSAFMAVAAgAEEARABNAEkATgBJAFMA +VABSAEEAVABJAFYARQAgAEcAUgBPAFUAUAAvAEMATgA9AFIARQBDAEkAUABJAEUA +TgBUAFMALwBDAE4APQAzAGsAdQBzAGUAcgAyAAAAAAADAAAwAAAAAAMA/18AAAAA +AwAVDAEAAAACAQswAQAAAEoAAABFWDovTz1CUi1FWENILVRFU1QvT1U9RklSU1Qg +QURNSU5JU1RSQVRJVkUgR1JPVVAvQ049UkVDSVBJRU5UUy9DTj0zS1VTRVIyAAAA +HwAgOgEAAAAQAAAAMwBrAHUAcwBlAHIAMgAAAAMA/V8BAAAACwBAOgAA+T8CAfdf +AQAAAGMAAAAAAAAA3KdAyMBCEBq0uQgAKy/hggEAAAAAAAAAL289QlItRVhDSC1U +RVNUL291PUZpcnN0IEFkbWluaXN0cmF0aXZlIEdyb3VwL2NuPVJlY2lwaWVudHMv +Y249M2t1c2VyMgAAAwAAOQAAAAAfAP45AQAAAEoAAAAzAGsAdQBzAGUAcgAyAEAA +YgByAGUAeABjAGgAYQBuAGcAZQAuAGQAbwBsAHAAaABpAG4AcwBlAGEAcgBjAGgA +LgBjAG8AbQAAAAAAAwBxOgAAAAAfAPZfAQAAABAAAAAzAGsAdQBzAGUAcgAyAAAA +m2sBA5AGAEwbAAAzAAAACwACAAEAAAAfABoAAQAAABIAAABJAFAATQAuAE4AbwB0 +AGUAAAAAAAMAJgAAAAAAAwA2AAAAAAAfADcAAQAAAB4AAABCAGkAbABsACAAbwBm +ACAAUgBpAGcAaAB0AHMAAAAAAEAAOQBgQvtkuknFAR8APQABAAAAAgAAAAAAAAAC +AUcAAQAAADgAAABjPXVzO2E9IDtwPUJSLUVYQ0gtVEVTVDtsPUJSLUVYQ0gtREVW +MS0wNTA0MjUxNzE1MzZaLTE0AB8AcAABAAAAHgAAAEIAaQBsAGwAIABvAGYAIABS +AGkAZwBoAHQAcwAAAAAAAgFxAAEAAAAWAAAAAcVJumT7yarjal9+TnmqsNvwaipi +/QAAHwAaDAEAAAAQAAAAMwBrAHIAZQBsAGEAeQAAAB8AHQ4BAAAAHgAAAEIAaQBs +AGwAIABvAGYAIABSAGkAZwBoAHQAcwAAAAAAAgETEAEAAADuFAAAPCFET0NUWVBF +IEhUTUwgUFVCTElDICItLy9XM0MvL0RURCBIVE1MIDQuMCBUcmFuc2l0aW9uYWwv +L0VOIj4NCjxIVE1MPjxIRUFEPg0KPE1FVEEgaHR0cC1lcXVpdj1Db250ZW50LVR5 +cGUgY29udGVudD0idGV4dC9odG1sOyBjaGFyc2V0PXVzLWFzY2lpIj4NCjxNRVRB +IGNvbnRlbnQ9Ik1TSFRNTCA2LjAwLjM3OTAuMTgzMCIgbmFtZT1HRU5FUkFUT1I+ +PC9IRUFEPg0KPEJPRFk+DQo8RElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNp +emU9Mj5USEUgQklMTCBPRiBSSUdIVFM8QlI+QW1lbmRtZW50cyAxLTEwIG9mIHRo +ZSANCkNvbnN0aXR1dGlvbjwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+ +DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPlRoZSBDb252ZW50aW9ucyBv +ZiBhIG51bWJlciBvZiB0aGUgU3RhdGVzIGhhdmluZywgDQphdCB0aGUgdGltZSBv +ZiBhZG9wdGluZyB0aGUgQ29uc3RpdHV0aW9uLCBleHByZXNzZWQgYSBkZXNpcmUs +IGluIG9yZGVyIHRvIA0KcHJldmVudCBtaXNjb25zdHJ1Y3Rpb24gb3IgYWJ1c2Ug +b2YgaXRzIHBvd2VycywgdGhhdCBmdXJ0aGVyIGRlY2xhcmF0b3J5IGFuZCANCnJl +c3RyaWN0aXZlIGNsYXVzZXMgc2hvdWxkIGJlIGFkZGVkLCBhbmQgYXMgZXh0ZW5k +aW5nIHRoZSBncm91bmQgb2YgcHVibGljIA0KY29uZmlkZW5jZSBpbiB0aGUgR292 +ZXJubWVudCB3aWxsIGJlc3QgaW5zdXJlIHRoZSBiZW5lZmljZW50IGVuZHMgb2Yg +aXRzIA0KaW5zdGl0dXRpb247IDxCUj5SZXNvbHZlZCwgYnkgdGhlIFNlbmF0ZSBh +bmQgSG91c2Ugb2YgUmVwcmVzZW50YXRpdmVzIG9mIHRoZSANClVuaXRlZCBTdGF0 +ZXMgb2YgQW1lcmljYSwgaW4gQ29uZ3Jlc3MgYXNzZW1ibGVkLCB0d28tdGhpcmRz +IG9mIGJvdGggSG91c2VzIA0KY29uY3VycmluZywgdGhhdCB0aGUgZm9sbG93aW5n +IGFydGljbGVzIGJlIHByb3Bvc2VkIHRvIHRoZSBMZWdpc2xhdHVyZXMgb2YgdGhl +IA0Kc2V2ZXJhbCBTdGF0ZXMsIGFzIGFtZW5kbWVudHMgdG8gdGhlIENvbnN0aXR1 +dGlvbiBvZiB0aGUgVW5pdGVkIFN0YXRlczsgYWxsIG9yIA0KYW55IG9mIHdoaWNo +IGFydGljbGVzLCB3aGVuIHJhdGlmaWVkIGJ5IHRocmVlLWZvdXJ0aHMgb2YgdGhl +IHNhaWQgTGVnaXNsYXR1cmVzLCANCnRvIGJlIHZhbGlkIHRvIGFsbCBpbnRlbnRz +IGFuZCBwdXJwb3NlcyBhcyBwYXJ0IG9mIHRoZSBzYWlkIENvbnN0aXR1dGlvbiwg +DQpuYW1lbHk6IDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElW +PjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5kbWVudCBJPC9GT05UPjwvRElW +Pg0KPERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXpl +PTI+Q29uZ3Jlc3Mgc2hhbGwgbWFrZSBubyBsYXcgcmVzcGVjdGluZyBhbiANCmVz +dGFibGlzaG1lbnQgb2YgcmVsaWdpb24sIG9yIHByb2hpYml0aW5nIHRoZSBmcmVl +IGV4ZXJjaXNlIHRoZXJlb2Y7IG9yIA0KYWJyaWRnaW5nIHRoZSBmcmVlZG9tIG9m +IHNwZWVjaCwgb3Igb2YgdGhlIHByZXNzOyBvciB0aGUgcmlnaHQgb2YgdGhlIHBl +b3BsZSANCnBlYWNlYWJseSB0byBhc3NlbWJsZSwgYW5kIHRvIHBldGl0aW9uIHRo +ZSBnb3Zlcm5tZW50IGZvciBhIHJlZHJlc3Mgb2YgDQpncmlldmFuY2VzLiA8L0ZP +TlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFy +aWFsIHNpemU9Mj5BbWVuZG1lbnQgSUk8L0ZPTlQ+PC9ESVY+DQo8RElWPiZuYnNw +OzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNpemU9Mj5BIHdlbGwgcmVn +dWxhdGVkIG1pbGl0aWEsIGJlaW5nIG5lY2Vzc2FyeSB0byB0aGUgDQpzZWN1cml0 +eSBvZiBhIGZyZWUgc3RhdGUsIHRoZSByaWdodCBvZiB0aGUgcGVvcGxlIHRvIGtl +ZXAgYW5kIGJlYXIgYXJtcywgc2hhbGwgDQpub3QgYmUgaW5mcmluZ2VkLiA8L0ZP +TlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFy +aWFsIHNpemU9Mj5BbWVuZG1lbnQgSUlJPC9GT05UPjwvRElWPg0KPERJVj4mbmJz +cDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXplPTI+Tm8gc29sZGll +ciBzaGFsbCwgaW4gdGltZSBvZiBwZWFjZSBiZSBxdWFydGVyZWQgaW4gDQphbnkg +aG91c2UsIHdpdGhvdXQgdGhlIGNvbnNlbnQgb2YgdGhlIG93bmVyLCBub3IgaW4g +dGltZSBvZiB3YXIsIGJ1dCBpbiBhIG1hbm5lciANCnRvIGJlIHByZXNjcmliZWQg +YnkgbGF3LiA8L0ZPTlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48 +Rk9OVCBmYWNlPUFyaWFsIHNpemU9Mj5BbWVuZG1lbnQgSVY8L0ZPTlQ+PC9ESVY+ +DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNpemU9 +Mj5UaGUgcmlnaHQgb2YgdGhlIHBlb3BsZSB0byBiZSBzZWN1cmUgaW4gdGhlaXIg +DQpwZXJzb25zLCBob3VzZXMsIHBhcGVycywgYW5kIGVmZmVjdHMsIGFnYWluc3Qg +dW5yZWFzb25hYmxlIHNlYXJjaGVzIGFuZCANCnNlaXp1cmVzLCBzaGFsbCBub3Qg +YmUgdmlvbGF0ZWQsIGFuZCBubyB3YXJyYW50cyBzaGFsbCBpc3N1ZSwgYnV0IHVw +b24gcHJvYmFibGUgDQpjYXVzZSwgc3VwcG9ydGVkIGJ5IG9hdGggb3IgYWZmaXJt +YXRpb24sIGFuZCBwYXJ0aWN1bGFybHkgZGVzY3JpYmluZyB0aGUgcGxhY2UgDQp0 +byBiZSBzZWFyY2hlZCwgYW5kIHRoZSBwZXJzb25zIG9yIHRoaW5ncyB0byBiZSBz +ZWl6ZWQuIDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxG +T05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5kbWVudCBWPC9GT05UPjwvRElWPg0K +PERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXplPTI+ +Tm8gcGVyc29uIHNoYWxsIGJlIGhlbGQgdG8gYW5zd2VyIGZvciBhIGNhcGl0YWws +IG9yIA0Kb3RoZXJ3aXNlIGluZmFtb3VzIGNyaW1lLCB1bmxlc3Mgb24gYSBwcmVz +ZW50bWVudCBvciBpbmRpY3RtZW50IG9mIGEgZ3JhbmQganVyeSwgDQpleGNlcHQg +aW4gY2FzZXMgYXJpc2luZyBpbiB0aGUgbGFuZCBvciBuYXZhbCBmb3JjZXMsIG9y +IGluIHRoZSBtaWxpdGlhLCB3aGVuIGluIA0KYWN0dWFsIHNlcnZpY2UgaW4gdGlt +ZSBvZiB3YXIgb3IgcHVibGljIGRhbmdlcjsgbm9yIHNoYWxsIGFueSBwZXJzb24g +YmUgc3ViamVjdCANCmZvciB0aGUgc2FtZSBvZmZlbnNlIHRvIGJlIHR3aWNlIHB1 +dCBpbiBqZW9wYXJkeSBvZiBsaWZlIG9yIGxpbWI7IG5vciBzaGFsbCBiZSANCmNv +bXBlbGxlZCBpbiBhbnkgY3JpbWluYWwgY2FzZSB0byBiZSBhIHdpdG5lc3MgYWdh +aW5zdCBoaW1zZWxmLCBub3IgYmUgZGVwcml2ZWQgDQpvZiBsaWZlLCBsaWJlcnR5 +LCBvciBwcm9wZXJ0eSwgd2l0aG91dCBkdWUgcHJvY2VzcyBvZiBsYXc7IG5vciBz +aGFsbCBwcml2YXRlIA0KcHJvcGVydHkgYmUgdGFrZW4gZm9yIHB1YmxpYyB1c2Us +IHdpdGhvdXQganVzdCBjb21wZW5zYXRpb24uIDwvRk9OVD48L0RJVj4NCjxESVY+ +Jm5ic3A7PC9ESVY+DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5k +bWVudCBWSTwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxG +T05UIGZhY2U9QXJpYWwgc2l6ZT0yPkluIGFsbCBjcmltaW5hbCBwcm9zZWN1dGlv +bnMsIHRoZSBhY2N1c2VkIHNoYWxsIA0KZW5qb3kgdGhlIHJpZ2h0IHRvIGEgc3Bl +ZWR5IGFuZCBwdWJsaWMgdHJpYWwsIGJ5IGFuIGltcGFydGlhbCBqdXJ5IG9mIHRo +ZSBzdGF0ZSANCmFuZCBkaXN0cmljdCB3aGVyZWluIHRoZSBjcmltZSBzaGFsbCBo +YXZlIGJlZW4gY29tbWl0dGVkLCB3aGljaCBkaXN0cmljdCBzaGFsbCANCmhhdmUg +YmVlbiBwcmV2aW91c2x5IGFzY2VydGFpbmVkIGJ5IGxhdywgYW5kIHRvIGJlIGlu +Zm9ybWVkIG9mIHRoZSBuYXR1cmUgYW5kIA0KY2F1c2Ugb2YgdGhlIGFjY3VzYXRp +b247IHRvIGJlIGNvbmZyb250ZWQgd2l0aCB0aGUgd2l0bmVzc2VzIGFnYWluc3Qg +aGltOyB0byANCmhhdmUgY29tcHVsc29yeSBwcm9jZXNzIGZvciBvYnRhaW5pbmcg +d2l0bmVzc2VzIGluIGhpcyBmYXZvciwgYW5kIHRvIGhhdmUgdGhlIA0KYXNzaXN0 +YW5jZSBvZiBjb3Vuc2VsIGZvciBoaXMgZGVmZW5zZS4gPC9GT05UPjwvRElWPg0K +PERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXplPTI+ +QW1lbmRtZW50IFZJSTwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8 +RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkluIHN1aXRzIGF0IGNvbW1vbiBs +YXcsIHdoZXJlIHRoZSB2YWx1ZSBpbiANCmNvbnRyb3ZlcnN5IHNoYWxsIGV4Y2Vl +ZCB0d2VudHkgZG9sbGFycywgdGhlIHJpZ2h0IG9mIHRyaWFsIGJ5IGp1cnkgc2hh +bGwgYmUgDQpwcmVzZXJ2ZWQsIGFuZCBubyBmYWN0IHRyaWVkIGJ5IGEganVyeSwg +c2hhbGwgYmUgb3RoZXJ3aXNlIHJlZXhhbWluZWQgaW4gYW55IA0KY291cnQgb2Yg +dGhlIFVuaXRlZCBTdGF0ZXMsIHRoYW4gYWNjb3JkaW5nIHRvIHRoZSBydWxlcyBv +ZiB0aGUgY29tbW9uIGxhdy4gDQo8L0ZPTlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwv +RElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNpemU9Mj5BbWVuZG1lbnQgVklJ +STwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxGT05UIGZh +Y2U9QXJpYWwgc2l6ZT0yPkV4Y2Vzc2l2ZSBiYWlsIHNoYWxsIG5vdCBiZSByZXF1 +aXJlZCwgbm9yIGV4Y2Vzc2l2ZSANCmZpbmVzIGltcG9zZWQsIG5vciBjcnVlbCBh +bmQgdW51c3VhbCBwdW5pc2htZW50cyBpbmZsaWN0ZWQuIDwvRk9OVD48L0RJVj4N +CjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0y +PkFtZW5kbWVudCBJWDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8 +RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPlRoZSBlbnVtZXJhdGlvbiBpbiB0 +aGUgQ29uc3RpdHV0aW9uLCBvZiBjZXJ0YWluIA0KcmlnaHRzLCBzaGFsbCBub3Qg +YmUgY29uc3RydWVkIHRvIGRlbnkgb3IgZGlzcGFyYWdlIG90aGVycyByZXRhaW5l +ZCBieSB0aGUgDQpwZW9wbGUuIDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9E +SVY+DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5kbWVudCBYPC9G +T05UPjwvRElWPg0KPERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1B +cmlhbCBzaXplPTI+VGhlIHBvd2VycyBub3QgZGVsZWdhdGVkIHRvIHRoZSBVbml0 +ZWQgU3RhdGVzIGJ5IA0KdGhlIENvbnN0aXR1dGlvbiwgbm9yIHByb2hpYml0ZWQg +YnkgaXQgdG8gdGhlIHN0YXRlcywgYXJlIHJlc2VydmVkIHRvIHRoZSBzdGF0ZXMg +DQpyZXNwZWN0aXZlbHksIG9yIHRvIHRoZSBwZW9wbGUuIDwvRk9OVD48L0RJVj48 +L0RJVj48L0JPRFk+PC9IVE1MPg0KAAAfADUQAQAAAKIAAAA8ADQANQAyADAARgA2 +ADEANQAxAEQAQQBGADIAQQA0ADQAQgBBADgANwA4AEIARgAyAEYAMwA4ADAAMwA0 +ADgARQAyADYARQA1AEAAYgByAC0AZQB4AGMAaAAtAGQAZQB2ADEALgBiAHIAZQB4 +AGMAaABhAG4AZwBlAC4AZABvAGwAcABoAGkAbgBzAGUAYQByAGMAaAAuAGMAbwBt +AD4AAAAAAAMAgBD/////HwDzEAEAAAAmAAAAQgBpAGwAbAAgAG8AZgAgAFIAaQBn +AGgAdABzAC4ARQBNAEwAAAAAAAsA9BAAAAAACwD1EAAAAAALAPYQAAAAAEAABzBR +lpFluknFAUAACDBRlpFluknFAQMA3j+fTgAAAwDxPwkEAAAfAPg/AQAAABAAAAAz +AGsAcgBlAGwAYQB5AAAAAgH5PwEAAABjAAAAAAAAANynQMjAQhAatLkIACsv4YIB +AAAAAAAAAC9PPUJSLUVYQ0gtVEVTVC9PVT1GSVJTVCBBRE1JTklTVFJBVElWRSBH +Uk9VUC9DTj1SRUNJUElFTlRTL0NOPTNLUkVMQVkAAB8A+j8BAAAAEAAAADMAawBy +AGUAbABhAHkAAAACAfs/AQAAAGMAAAAAAAAA3KdAyMBCEBq0uQgAKy/hggEAAAAA +AAAAL089QlItRVhDSC1URVNUL09VPUZJUlNUIEFETUlOSVNUUkFUSVZFIEdST1VQ +L0NOPVJFQ0lQSUVOVFMvQ049M0tSRUxBWQAAAwD9P+QEAAADABlAAAAAAAMAGkAA +AAAAHwAwQAEAAAAQAAAAMwBLAFIARQBMAEEAWQAAAB8AMUABAAAAEAAAADMASwBS +AEUATABBAFkAAAAfADhAAQAAABAAAAAzAEsAUgBFAEwAQQBZAAAAHwA5QAEAAAAQ +AAAAMwBLAFIARQBMAEEAWQAAAAMAdkD/////AwACWQAAFgADAAlZAgAAAAsAhYEI +IAYAAAAAAMAAAAAAAABGAAAAAA6FAAAAAAAAAwCdgQggBgAAAAAAwAAAAAAAAEYA +AAAAUoUAAJjDAQAfAJ6BCCAGAAAAAADAAAAAAAAARgAAAABUhQAAAQAAAAoAAAAx +ADEALgAwAAAAAAADAOmBCCAGAAAAAADAAAAAAAAARgAAAAABhQAAAAAAAAsA7oEI +IAYAAAAAAMAAAAAAAABGAAAAAAOFAAAAAAAAAwD4gQggBgAAAAAAwAAAAAAAAEYA +AAAAEIUAAAAAAAADAP+BCCAGAAAAAADAAAAAAAAARgAAAAAYhQAAAAAAAAsAIIII +IAYAAAAAAMAAAAAAAABGAAAAAAaFAAAAAAAACwAkggggBgAAAAAAwAAAAAAAAEYA +AAAAgoUAAAAAAAAfACaCCCAGAAAAAADAAAAAAAAARgAAAACDhQAAAQAAACYAAAA0 +ADAANQAxADMAMQA1ADEANwAtADIANQAwADQAMgAwADAANQAAAAAAAwBxggggBgAA +AAAAwAAAAAAAAEYAAAAAk4UAAAAAAAALACkAAAAAAAsAIwAAAAAAAgF/AAEAAABR +AAAAPDQ1MjBGNjE1MURBRjJBNDRCQTg3OEJGMkYzODAzNDhFMjZFNUBici1leGNo +LWRldjEuYnJleGNoYW5nZS5kb2xwaGluc2VhcmNoLmNvbT4AAAAAC/o= + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_-- + diff --git a/spec/fixtures/files/tnef-attachment-truncated.email b/spec/fixtures/files/tnef-attachment-truncated.email new file mode 100644 index 000000000..365a5a442 --- /dev/null +++ b/spec/fixtures/files/tnef-attachment-truncated.email @@ -0,0 +1,34 @@ +From hello@blah.local Fri Feb 21 16:23:14 2013 +Return-path: <bar@example.org> +Envelope-to: foo@example.org +Delivery-date: Fri, 21 Feb 2013 16:23:14 +0000 +Content-Type: multipart/mixed; +    boundary="_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_" +From: <bar@example.org> +To: <foo@example.org> +Sender: <hello@blah.local> +Date: Fri, 21 Feb 2013 16:23:04 +0000 +Subject: here's a useless email +Message-ID: <12345@blah.local> +Accept-Language: en-US, en-GB +Content-Language: en-US +X-MS-Has-Attach: +X-MS-TNEF-Correlator: <12345@blah.local> +acceptlanguage: en-US, en-GB +MIME-Version: 1.0 + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_ +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: quoted-printable + +Some introductory text here, before the malformed TNEF attachment. + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_ +Content-Disposition: attachment; filename="winmail.dat" +Content-Transfer-Encoding: base64 +Content-Type: application/ms-tnef; name="winmail.dat" + +eJ8+IkV9AQaQCAAEAAAAAAABAAEAAQeQBgAIAAAA5AQAAAAAAADoAAEJgAEAIQAAAEMyRUUzRUYx + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_-- + diff --git a/spec/lib/basic_encoding_tests.rb b/spec/lib/basic_encoding_tests.rb new file mode 100644 index 000000000..35d35fd4a --- /dev/null +++ b/spec/lib/basic_encoding_tests.rb @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') + +def bytes_to_binary_string( bytes, claimed_encoding = nil ) +    claimed_encoding ||= 'ASCII-8BIT' +    bytes_string = bytes.pack('c*') +    if RUBY_VERSION.to_f >= 1.9 +        bytes_string.force_encoding! claimed_encoding +    end +    bytes_string +end + +random_string = bytes_to_binary_string [ 0x0f, 0x58, 0x1c, 0x8f, 0xa4, 0xcf, +                                         0xf6, 0x8c, 0x9d, 0xa7, 0x06, 0xd9, +                                         0xf7, 0x90, 0x6c, 0x6f] + +windows_1252_string = bytes_to_binary_string [ 0x44, 0x41, 0x53, 0x48, 0x20, +                                               0x96, 0x20, 0x44, 0x41, 0x53, +                                               0x48 ] + +# It's a shame this example is so long, but if we don't take enough it +# gets misinterpreted as Shift_JIS + +gb_18030_bytes = [ 0xb9, 0xf3, 0xb9, 0xab, 0xcb, 0xbe, 0xb8, 0xba, 0xd4, 0xf0, +                   0xc8, 0xcb, 0x28, 0xbe, 0xad, 0xc0, 0xed, 0x2f, 0xb2, 0xc6, +                   0xce, 0xf1, 0x29, 0xc4, 0xfa, 0xba, 0xc3, 0xa3, 0xba, 0x0d, +                   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, +                   0x20, 0x20, 0x20, 0xb1, 0xbe, 0xb9, 0xab, 0xcb, 0xbe, 0xd4, +                   0xda, 0x31, 0x39, 0x39, 0x37, 0xc4, 0xea, 0xb3, 0xc9, 0xc1, +                   0xa2, 0xb9, 0xfa, 0xbc, 0xd2, 0xb9, 0xa4, 0xc9, 0xcc, 0xd7, +                   0xa2, 0xb2, 0xe1, 0x2e, 0xca, 0xb5, 0xc1, 0xa6, 0xd0, 0xdb, +                   0xba, 0xf1, 0xa1, 0xa3, 0xd3, 0xd0, 0xb6, 0xc0, 0xc1, 0xa2, +                   0xcb, 0xb0, 0xce, 0xf1, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, +                   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xd7, 0xa8, 0xd2, 0xb5, +                   0xc8, 0xcb, 0xd4, 0xb1, 0x3b, 0xd4, 0xda, 0xc8, 0xab, 0xb9, +                   0xfa, 0xb8, 0xf7, 0xb3, 0xc7, 0xca, 0xd0, 0xc9, 0xe8, 0xc1, +                   0xa2, 0xb7, 0xd6, 0xb9, 0xab, 0xcb, 0xbe, 0xa3, 0xa8, 0xd5, +                   0xe3, 0xbd, 0xad, 0xa1, 0xa2, 0xc9, 0xcf, 0xba, 0xa3, 0xa1, +                   0xa2, 0xb9, 0xe3, 0xd6, 0xdd, 0xa1, 0xa2, 0xbd, 0xad, 0xcb, +                   0xd5, 0xb5, 0xc8, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, +                   0x20, 0x20, 0x20, 0x20, 0x20, 0xb5, 0xd8, 0xb7, 0xbd, 0xa3, +                   0xa9, 0xd2, 0xf2, 0xbd, 0xf8, 0xcf, 0xee, 0xbd, 0xcf, 0xb6, +                   0xe0, 0xcf, 0xd6, 0xcd, 0xea, 0xb3, 0xc9, 0xb2, 0xbb, 0xc1, +                   0xcb, 0xc3, 0xbf, 0xd4, 0xc2, 0xcf, 0xfa, 0xca, 0xdb, 0xb6, +                   0xee, 0xb6, 0xc8, 0xa1, 0xa3, 0xc3, 0xbf, 0xd4, 0xc2, 0xd3, +                   0xd0, 0xd2, 0xbb, 0xb2, 0xbf, 0xb7, 0xd6, 0x0d, 0x0a, 0x20, +                   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xd4, +                   0xf6, 0xd6, 0xb5, 0xb6, 0x90, 0xa3, 0xa8, 0x36, 0x2d, 0x37, +                   0x25, 0xd7, 0xf3, 0xd3, 0xd2, 0x29, 0xba, 0xcd, 0xc6, 0xd5, +                   0xc6, 0xb1, 0xa3, 0xa8, 0x30, 0x2e, 0x35, 0x25, 0x2d, 0x32, +                   0x25, 0x20, 0xd7, 0xf3, 0xd3, 0xd2, 0xa3, 0xa9, 0xd3, 0xc5, +                   0xbb, 0xdd, 0xb4, 0xfa, 0xbf, 0xaa, 0xbb, 0xf2, 0xba, 0xcf, +                   0xd7, 0xf7, 0xa3, 0xac, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, +                   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xb5, 0xe3, 0xca, 0xfd, +                   0xbd, 0xcf, 0xb5, 0xcd, 0xa1, 0xa3, 0xb4, 0xfa, 0xc0, 0xed, +                   0xb7, 0xb6, 0xce, 0xa7, 0xc8, 0xe7, 0xcf, 0xc2, 0xa3, 0xba, +                   0x0d, 0x0a ] + +gb_18030_spam_string = bytes_to_binary_string gb_18030_bytes + +describe "normalize_string_to_utf8" do + +    describe "when passed uniterpretable character data" do + +        it "should reject it as invalid" do + +            expect { +                normalize_string_to_utf8 random_string +            }.to raise_error(EncodingNormalizationError) + +            expect { +                normalize_string_to_utf8 random_string, 'UTF-8' +            }.to raise_error(EncodingNormalizationError) + +        end +    end + +    describe "when passed unlabelled Windows 1252 data" do + +        it "should correctly convert it to UTF-8" do + +            normalized = normalize_string_to_utf8 windows_1252_string + +            normalized.should ==  "DASH – DASH" + +        end + +    end + +    describe "when passed GB 18030 data" do + +        it "should correctly convert it to UTF-8 if unlabelled" do + +            normalized = normalize_string_to_utf8 gb_18030_spam_string + +            normalized.should start_with("贵公司负责人") + +        end + +    end + +end + +describe "convert_string_to_utf8_or_binary" do + +    describe "when passed uniterpretable character data" do + +        it "should return it as a binary string" do + +            converted = convert_string_to_utf8_or_binary random_string +            converted.should == random_string + +            if RUBY_VERSION.to_f >= 1.9 +                converted.encoding.should == 'ASCII-8BIT' +            end + +            converted = convert_string_to_utf8_or_binary random_string,'UTF-8' +            converted.should == random_string + +            if RUBY_VERSION.to_f >= 1.9 +                converted.encoding.should == 'ASCII-8BIT' +            end + +        end +    end + +    describe "when passed unlabelled Windows 1252 data" do + +        it "should correctly convert it to UTF-8" do + +            converted = convert_string_to_utf8_or_binary windows_1252_string + +            converted.should ==  "DASH – DASH" + +            if RUBY_VERSION.to_f >= 1.9 +                converted.encoding.should == 'UTF-8' +            end +        end + +    end + +    describe "when passed GB 18030 data" do + +        it "should correctly convert it to UTF-8 if unlabelled" do + +            converted = convert_string_to_utf8_or_binary gb_18030_spam_string + +            converted.should start_with("贵公司负责人") + +            if RUBY_VERSION.to_f >= 1.9 +                converted.encoding.should == 'UTF-8' +            end +        end + +    end + +end diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 79b779687..01bf179f8 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -20,12 +20,33 @@ describe 'when creating a mail object from raw data' do          mail.to.should == ["request-66666-caa77777@whatdotheyknow.com", "foi@example.com"]      end +    it 'should return nil for malformed To: and Cc: lines' do +        mail = get_fixture_mail('malformed-to-and-cc.email') +        mail.to.should == nil +        mail.cc.should == nil +    end +      it 'should convert an iso8859 email to utf8' do          mail = get_fixture_mail('iso8859_2_raw_email.email')          mail.subject.should match /gjatë/u          MailHandler.get_part_body(mail).is_utf8?.should == true      end +    it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do +        mail = get_fixture_mail('mislabelled-as-iso-8859-1.email') +        body = MailHandler.get_part_body(mail) +        body.is_utf8?.should == true +        # This email is broken in at least these two ways: +        #  1. It contains a top bit set character (0x96) despite the +        #     "Content-Transfer-Encoding: 7bit" +        #  2. The charset in the Content-Type header is "iso-8859-1" +        #     but 0x96 is actually a Windows-1252 en dash, which would +        #     be Unicode codepoint 2013.  It should be possible to +        #     spot the mislabelling, since 0x96 isn't a valid +        #     ISO-8859-1 character. +        body.should match(/ \xe2\x80\x93 /) +    end +  end  describe 'when asked for the from name' do @@ -275,6 +296,12 @@ end  describe 'when getting attachment attributes' do +    it 'should handle a mail with a non-multipart part with no charset in the Content-Type header' do +        mail = get_fixture_mail('part-without-charset-in-content-type.email') +        attributes = MailHandler.get_attachment_attributes(mail) +        attributes.size.should == 2 +    end +      it 'should get two attachment parts from a multipart mail with text and html alternatives      and an image' do          mail = get_fixture_mail('quoted-subject-iso8859-1.email') @@ -282,6 +309,13 @@ describe 'when getting attachment attributes' do          attributes.size.should == 2      end +    it 'should get one attachment from a multipart mail with text and HTML alternatives, which should be UTF-8' do +        mail = get_fixture_mail('iso8859_2_raw_email.email') +        attributes = MailHandler.get_attachment_attributes(mail) +        attributes.length.should == 1 +        attributes[0][:body].is_utf8?.should == true +    end +      it 'should expand a mail attached as text' do          # Note that this spec will only pass using Tmail in the timezone set as datetime headers          # are rendered out in the local time - using the Mail gem this is not necessary @@ -304,6 +338,52 @@ describe 'when getting attachment attributes' do          attributes = MailHandler.get_attachment_attributes(mail)      end +    it 'should ignore truncated TNEF attachment' do +        mail = get_fixture_mail('tnef-attachment-truncated.email') +        attributes = MailHandler.get_attachment_attributes(mail) +        attributes.length.should == 2 +    end + +    it 'should ignore anything beyond the final MIME boundary' do +        pending do +            # This example raw email has a premature closing boundary for +            # the outer multipart/mixed - my reading of RFC 1521 is that +            # the "epilogue" beyond that should be ignored. +            # See https://github.com/mysociety/alaveteli/issues/922 for +            # more discussion. +            mail = get_fixture_mail('nested-attachments-premature-end.email') +            attributes = MailHandler.get_attachment_attributes(mail) +            attributes.length.should == 3 +        end +    end + +    it 'should cope with a missing final MIME boundary' do +        mail = get_fixture_mail('multipart-no-final-boundary.email') +        attributes = MailHandler.get_attachment_attributes(mail) +        attributes.length.should == 1 +        attributes[0][:body].should match(/This is an acknowledgement of your email/) +        attributes[0][:content_type].should == "text/plain" +        attributes[0][:url_part_number].should == 1 +    end + +    it 'should ignore a TNEF attachment with no usable contents' do +        # FIXME: "no usable contents" is slightly misleading.  The +        # attachment in this example email does have usable content in +        # the body of the TNEF attachment, but the invocation of tnef +        # historically used to unpack these attachments doesn't add +        # the --save-body parameter, so that they have been ignored so +        # far.  We probably should include the body from such +        # attachments, but, at the moment, with the pending upgrade to +        # Rails 3, we just want to check that the behaviour is the +        # same as before. +        mail = get_fixture_mail('tnef-attachment-empty.email') +        attributes = MailHandler.get_attachment_attributes(mail) +        attributes.length.should == 2 +        # This is the size of the TNEF-encoded attachment; currently, +        # we expect the code just to return this without decoding: +        attributes[1][:body].length.should == 7769 +    end +      it 'should produce a consistent set of url_part_numbers, content_types, within_rfc822_subjects          and filenames from an example mail with lots of attachments' do          mail = get_fixture_mail('many-attachments-date-header.email') diff --git a/spec/models/incoming_message_spec.rb b/spec/models/incoming_message_spec.rb index e22235298..1d86c26ad 100644 --- a/spec/models/incoming_message_spec.rb +++ b/spec/models/incoming_message_spec.rb @@ -59,12 +59,19 @@ describe IncomingMessage, " when dealing with incoming mail" do          message.subject.should == "Câmara Responde:  Banco de ideias"      end -    it 'should not error on display of a message which has no charset set on the body part and -        is not good utf-8' do +    it 'should deal with GB18030 text even if the charset is missing' do          ir = info_requests(:fancy_dog_request)          receive_incoming_mail('no-part-charset-bad-utf8.email', ir.incoming_email)          message = ir.incoming_messages[1]          message.parse_raw_email! +        message.get_main_body_text_internal.should include("贵公司负责人") +    end + +    it 'should not error on display of a message which has no charset set on the body part and is not good UTF-8' do +        ir = info_requests(:fancy_dog_request) +        receive_incoming_mail('no-part-charset-random-data.email', ir.incoming_email) +        message = ir.incoming_messages[1] +        message.parse_raw_email!          message.get_main_body_text_internal.should include("The above text was badly encoded")      end @@ -412,6 +419,17 @@ describe IncomingMessage, " when uudecoding bad messages" do          im.get_attachments_for_display.size.should == 1      end +    it "should still work when parsed from the raw email" do +        raw_email = load_file_fixture 'inline-uuencode.email' +        mail = MailHandler.mail_from_raw_email(raw_email) +        im = incoming_messages :useless_incoming_message +        im.stub!(:raw_email).and_return(raw_email) +        im.stub!(:mail).and_return(mail) +        im.parse_raw_email! +        attachments = im.foi_attachments +        attachments.size.should == 2 +    end +      it "should apply censor rules" do          mail = get_fixture_mail('incoming-request-bad-uuencoding.email') diff --git a/spec/support/email_helpers.rb b/spec/support/email_helpers.rb index 7e98c39f6..252b1f137 100644 --- a/spec/support/email_helpers.rb +++ b/spec/support/email_helpers.rb @@ -8,7 +8,7 @@ end  def receive_incoming_mail(email_name, email_to, email_from = 'geraldinequango@localhost')      email_name = file_fixture_name(email_name) -    content = File.read(email_name) +    content = File.open(email_name, 'rb') { |f| f.read }      content.gsub!('EMAIL_TO', email_to)      content.gsub!('EMAIL_FROM', email_from)      RequestMailer.receive(content) diff --git a/spec/support/load_file_fixtures.rb b/spec/support/load_file_fixtures.rb index 08079f654..a54505e99 100644 --- a/spec/support/load_file_fixtures.rb +++ b/spec/support/load_file_fixtures.rb @@ -2,13 +2,7 @@ def file_fixture_name(file_name)      return File.join(RSpec.configuration.fixture_path, "files", file_name)  end -def load_file_fixture(file_name, as_binary=false) +def load_file_fixture(file_name)      file_name = file_fixture_name(file_name) -    content = File.open(file_name, 'r') do |file| -        if as_binary -            file.set_encoding(Encoding::BINARY) if file.respond_to?(:set_encoding) -        end -        file.read -    end -    return content +    return File.open(file_name, 'rb') { |f| f.read }  end | 
