diff options
| author | Louise Crow <louise.crow@gmail.com> | 2015-05-26 13:47:57 +0100 | 
|---|---|---|
| committer | Louise Crow <louise.crow@gmail.com> | 2015-06-04 12:22:04 +0100 | 
| commit | e64c0657c3d3ed0ca2349fd0344d6b43213c4008 (patch) | |
| tree | fb8ec5ef894bae745043d1e6ad360219ebb838c7 | |
| parent | 592434c4b2b8e58416b3cdb3a66513ac206576d1 (diff) | |
Make sure spelling corrections are encoded as UTF-8
| -rw-r--r-- | lib/acts_as_xapian/acts_as_xapian.rb | 5 | ||||
| -rw-r--r-- | spec/lib/acts_as_xapian_spec.rb | 104 | 
2 files changed, 108 insertions, 1 deletions
| diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb index f742bae52..9194e8cc4 100644 --- a/lib/acts_as_xapian/acts_as_xapian.rb +++ b/lib/acts_as_xapian/acts_as_xapian.rb @@ -375,7 +375,10 @@ module ActsAsXapian              if correction.empty?                  return nil              end -            return correction +            if correction.respond_to?(:force_encoding) +                correction = correction.force_encoding('UTF-8') +            end +            correction          end          # Return array of models found diff --git a/spec/lib/acts_as_xapian_spec.rb b/spec/lib/acts_as_xapian_spec.rb new file mode 100644 index 000000000..d411bf856 --- /dev/null +++ b/spec/lib/acts_as_xapian_spec.rb @@ -0,0 +1,104 @@ +# -*- encoding : utf-8 -*- +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') + +describe ActsAsXapian::Search do + +    before :all do +        # make sure an index exists +        ActsAsXapian.update_index +    end + +    describe "#words_to_highlight" do + +        it "should return a list of words used in the search" do +            s = ActsAsXapian::Search.new([PublicBody], "albatross words", :limit => 100) +            s.words_to_highlight.should == ["albatross", "word"] +        end + +        it "should remove any operators" do +            s = ActsAsXapian::Search.new([PublicBody], "albatross words tag:mice", :limit => 100) +            s.words_to_highlight.should == ["albatross", "word"] +        end + +        it "should separate punctuation" do +            s = ActsAsXapian::Search.new([PublicBody], "The doctor's patient", :limit => 100) +            s.words_to_highlight.should == ["the", "doctor", "patient"].sort +        end + +        it "should handle non-ascii characters" do +            s = ActsAsXapian::Search.new([PublicBody], "adatigénylés words tag:mice", :limit => 100) +            s.words_to_highlight.should == ["adatigénylé", "word"] +        end + +        it "should ignore stopwords" do +            s = ActsAsXapian::Search.new([PublicBody], "department of humpadinking", :limit => 100) +            s.words_to_highlight.should_not include('of') +        end + +        it "uses stemming" do +            s = ActsAsXapian::Search.new([PublicBody], 'department of humpadinking', :limit => 100) +            s.words_to_highlight.should == ["depart", "humpadink"] +        end + +        it "doesn't stem proper nouns" do +            s = ActsAsXapian::Search.new([PublicBody], 'department of Humpadinking', :limit => 1) +            s.words_to_highlight.should == ["depart", "humpadinking"] +        end + +        it "includes the original search terms if requested" do +            s = ActsAsXapian::Search.new([PublicBody], 'boring', :limit => 1) +            s.words_to_highlight(:include_original => true).should == ['bore', 'boring'] +        end + +        it "does not return duplicate terms" do +            s = ActsAsXapian::Search.new([PublicBody], 'boring boring', :limit => 1) +            s.words_to_highlight.should == ['bore'] +        end + +        context 'the :regex option' do + +            it 'wraps each words in a regex that matches the full word' do +                expected = [/\b(albatross)\b/iu] +                s = ActsAsXapian::Search.new([PublicBody], 'Albatross', :limit => 1) +                s.words_to_highlight(:regex => true).should == expected +            end + +            it 'wraps each stem in a regex' do +                expected = [/\b(depart)\w*\b/iu] +                s = ActsAsXapian::Search.new([PublicBody], 'department', :limit => 1) +                s.words_to_highlight(:regex => true).should == expected +            end + +        end +    end + +    describe :spelling_correction do + +        before :all do +            @alice = FactoryGirl.create(:public_body, :name => 'alice') +            @bob = FactoryGirl.create(:public_body, :name => 'bôbby') +            ActsAsXapian.update_index +        end + +        it 'returns a UTF-8 encoded string' do +            s = ActsAsXapian::Search.new([PublicBody], "alece", :limit => 100) +            s.spelling_correction.should == "alice" +            if s.spelling_correction.respond_to? :encoding +                s.spelling_correction.encoding.to_s.should == 'UTF-8' +            end +        end + +        it 'handles non-ASCII characters' do +            s = ActsAsXapian::Search.new([PublicBody], "bobby", :limit => 100) +            s.spelling_correction.should == "bôbby" +        end + +        after :all do +            @alice.destroy +            @bob.destroy +            ActsAsXapian.update_index +        end + +    end + +end
\ No newline at end of file | 
