diff options
| author | Louise Crow <louise.crow@gmail.com> | 2015-06-11 17:27:49 +0100 | 
|---|---|---|
| committer | Louise Crow <louise.crow@gmail.com> | 2015-06-22 17:43:22 +0100 | 
| commit | 19c5e549b8529b10faa36307efceab207afd3224 (patch) | |
| tree | e2823f98304cfb41d583e07b07b5d1f842387c3b /app | |
| parent | 3d8f0cc6b70b55aad20ab4d86642f0e6d605c921 (diff) | |
Add body_as_text, default_body.
Main intent here is to be more explicit about what form you're
getting the attachment body in - valid UTF-8 or binary, and has it
been cleaned up in order to be UTF-8?
Diffstat (limited to 'app')
| -rw-r--r-- | app/controllers/request_controller.rb | 8 | ||||
| -rw-r--r-- | app/models/foi_attachment.rb | 21 | ||||
| -rw-r--r-- | app/models/incoming_message.rb | 12 | 
3 files changed, 26 insertions, 15 deletions
| diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index 45229fd7e..26e3b350c 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -763,12 +763,12 @@ class RequestController < ApplicationController          # Prevent spam to magic request address. Note that the binary          # subsitution method used depends on the content type -        @incoming_message.apply_masks!(@attachment.body, @attachment.content_type) +        body = @attachment.default_body +        @incoming_message.apply_masks!(body, @attachment.content_type)          if response.content_type == 'text/html' -            @attachment.body = ActionController::Base.helpers.sanitize(@attachment.body) +            body = ActionController::Base.helpers.sanitize(body)          end - -        render :text => @attachment.body +        render :text => body      end      def get_attachment_as_html diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index 978e11a17..37a9c9827 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -62,19 +62,18 @@ class FoiAttachment < ActiveRecord::Base          }          update_display_size!          @cached_body = d +        if String.method_defined?(:encode) +            @cached_body = @cached_body.force_encoding("ASCII-8BIT") +        end      end +    # raw body, encoded as binary      def body          if @cached_body.nil?              tries = 0              delay = 1              begin -                binary_data = File.open(self.filepath, "rb" ){ |file| file.read } -                if text_type? -                    @cached_body = convert_string_to_utf8(binary_data, 'UTF-8').string -                else -                    @cached_body = binary_data -                end +                @cached_body = File.open(filepath, "rb" ){ |file| file.read }              rescue Errno::ENOENT                  # we've lost our cached attachments for some reason.  Reparse them.                  if tries > BODY_MAX_TRIES @@ -93,6 +92,16 @@ class FoiAttachment < ActiveRecord::Base          return @cached_body      end +    # body as UTF-8 text, with scrubbing of invalid chars if needed +    def body_as_text +        convert_string_to_utf8(body, 'UTF-8') +    end + +    # for text types, the scrubbed UTF-8 text. For all other types, the +    # raw binary +    def default_body +        text_type? ? body_as_text.string : body +    end      # List of DSN codes taken from RFC 3463      # http://tools.ietf.org/html/rfc3463 diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 7e1567bd1..749f27832 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -374,9 +374,8 @@ class IncomingMessage < ActiveRecord::Base              text = "[ Email has no body, please see attachments ]"              source_charset = "utf-8"          else -            # by default, the body (coming from an foi_attachment) should have been converted to utf-8 -            text = part.body -            source_charset = part.charset +            # whatever kind of attachment it is, get the UTF-8 encoded text +            text = part.body_as_text.string              if part.content_type == 'text/html'                  # e.g. http://www.whatdotheyknow.com/request/35/response/177                  # TODO: This is a bit of a hack as it is calling a @@ -405,8 +404,11 @@ class IncomingMessage < ActiveRecord::Base              end          end -        # If text hasn't been converted, we sanitise it. -        text = _sanitize_text(text) +        # Add an annotation if the text had to be scrubbed +        if part.body_as_text.scrubbed? +            text += _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]", +                          :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli')) +        end          # Fix DOS style linefeeds to Unix style ones (or other later regexps won't work)          text = text.gsub(/\r\n/, "\n") | 
