From cc4f37cb7a618dc44c4a63086a6936a55e8d0ed5 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Mon, 25 May 2026 19:44:16 -0500 Subject: [PATCH 01/14] Update tests to use the ImportedUrl model --- .../api/v2/api_works_search_spec.rb | 5 ++++- spec/controllers/api/v2/api_works_spec.rb | 3 ++- spec/lib/tasks/opendoors.rake_spec.rb | 11 +++++++---- spec/models/story_parser_spec.rb | 9 ++++++--- spec/models/work_spec.rb | 18 ++++++++++++------ test/fixtures/imported_urls.yml | 15 +++++++++++++++ 6 files changed, 46 insertions(+), 15 deletions(-) create mode 100644 test/fixtures/imported_urls.yml diff --git a/spec/controllers/api/v2/api_works_search_spec.rb b/spec/controllers/api/v2/api_works_search_spec.rb index d2365172510..b8349881fb2 100644 --- a/spec/controllers/api/v2/api_works_search_spec.rb +++ b/spec/controllers/api/v2/api_works_search_spec.rb @@ -11,7 +11,10 @@ def post_search_result(valid_params) describe "API v2 WorksController - Search", type: :request, work_search: true do describe "valid work URL request" do - let!(:work) { create(:work, imported_from_url: "foo") } + let!(:work) { + create(:work) + :work.imported_url = ImportedUrl.new(original: "foo") + } it "returns 200 OK" do valid_params = { works: [{ original_urls: %w(bar foo) }] } diff --git a/spec/controllers/api/v2/api_works_spec.rb b/spec/controllers/api/v2/api_works_spec.rb index a515cc20cba..93a99715769 100644 --- a/spec/controllers/api/v2/api_works_spec.rb +++ b/spec/controllers/api/v2/api_works_spec.rb @@ -514,7 +514,8 @@ end it "work_url_from_external returns the work url when a work is found" do - work1 = create(:work, imported_from_url: "http://foo") + work1 = create(:work) + work1.imported_url = ImportedUrl.new(original: "http://foo") work_url_response = @under_test.instance_eval { find_work_by_import_url("http://foo") } expect(work_url_response[:works].first).to eq work1 end diff --git a/spec/lib/tasks/opendoors.rake_spec.rb b/spec/lib/tasks/opendoors.rake_spec.rb index 380d71bc08c..744d0764a42 100644 --- a/spec/lib/tasks/opendoors.rake_spec.rb +++ b/spec/lib/tasks/opendoors.rake_spec.rb @@ -8,8 +8,11 @@ original_url = "http://another/1" original_url2 = "http://another/2" - let!(:work_with_temp_url) { create(:work, id: id1, imported_from_url: temp_url) } - let!(:work_with_no_url) { create(:work, id: id2, imported_from_url: nil) } + let!(:work_with_temp_url) { + create(:work, id: id1) + :work.imported_url = ImportedUrl.new(original: temp_url) + } + let!(:work_with_no_url) { create(:work, id: id2) } after do File.delete("opendoors_result.txt") if File.exist?("opendoors_result.txt") @@ -19,8 +22,8 @@ it "takes a path to a CSV and updates works" do path = file_fixture("opendoors_import_url_mapping.csv") subject.invoke(path) - expect(Work.find(work_with_temp_url.id).imported_from_url).to eq(original_url) - expect(Work.find(work_with_no_url.id).imported_from_url).to eq(original_url2) + expect(Work.find(work_with_temp_url.id).imported_url.original).to eq(original_url) + expect(Work.find(work_with_no_url.id).imported_url.original).to eq(original_url2) end it "fails if the CSV path is invalid" do expect { subject.invoke("not a path") }.to raise_error Errno::ENOENT diff --git a/spec/models/story_parser_spec.rb b/spec/models/story_parser_spec.rb index a0269a3b2dc..4100cfd2e9c 100644 --- a/spec/models/story_parser_spec.rb +++ b/spec/models/story_parser_spec.rb @@ -117,19 +117,22 @@ class StoryParser let(:location_partial_match) { "http://testme.org/welcome_to_test_vale/12345" } it "should recognise previously imported www. works" do - @work = FactoryBot.create(:work, imported_from_url: location_with_www) + @work = FactoryBot.create(:work) + @work.imported_url = ImportedUrl.new(original: location_with_www) expect { @sp.check_for_previous_import(location_no_www) }.to raise_exception(StoryParser::Error) end it "should recognise previously imported non-www. works" do - @work = FactoryBot.create(:work, imported_from_url: location_no_www) + @work = FactoryBot.create(:work) + @work.imported_url = ImportedUrl.new(original: location_no_www) expect { @sp.check_for_previous_import(location_with_www) }.to raise_exception(StoryParser::Error) end it "should not perform a partial match on work import locations" do - @work = create(:work, imported_from_url: location_partial_match) + @work = create(:work) + @work.imported_url = ImportedUrl.new(original: location_partial_match) expect { @sp.check_for_previous_import("http://testme.org/welcome_to_test_vale/123") }.to_not raise_exception end diff --git a/spec/models/work_spec.rb b/spec/models/work_spec.rb index ad4a102125f..63cdf502aee 100644 --- a/spec/models/work_spec.rb +++ b/spec/models/work_spec.rb @@ -360,7 +360,8 @@ http://lj-site.com/bar/foo?color=blue https://www.lj-site.com/bar/foo?color=blue http://www.foo.com/bar https://www.foo.com/bar].each do |url| - work = create(:work, imported_from_url: url) + work = create(:work) + work.imported_url = ImportedUrl.create(original: url) expect(Work.find_by_url(url)).to eq(work) work.destroy end @@ -372,33 +373,38 @@ "http://efiction-site.com/viewstory.php?sid=123" => "http://efiction-site.com/viewstory.php?sid=456", "http://www.foo.com/i-am-something" => "http://foo.com/i-am-something/else" }.each do |import_url, find_url| - work = create(:work, imported_from_url: import_url) + work = create(:work) + work.imported_url = ImportedUrl.create(original: import_url) expect(Work.find_by_url(find_url)).to_not eq(work) work.destroy end end it "should find works imported with irrelevant query parameters" do - work = create(:work, imported_from_url: "http://lj-site.com/thing1?style=mine") + work = create(:work) + work.imported_url = ImportedUrl.create(original: "http://lj-site.com/thing1?style=mine") expect(Work.find_by_url("http://lj-site.com/thing1?style=other")).to eq(work) work.destroy end it "finds works imported with HTTP protocol and irrelevant query parameters" do - work = create(:work, imported_from_url: "http://lj-site.com/thing1?style=mine") + work = create(:work) + work.imported_url = ImportedUrl.create(original: "http://lj-site.com/thing1?style=mine") expect(Work.find_by_url("https://lj-site.com/thing1?style=other")).to eq(work) work.destroy end it "finds works imported with HTTPS protocol and irrelevant query parameters" do - work = create(:work, imported_from_url: "https://lj-site.com/thing1?style=mine") + work = create(:work) + work.imported_url = ImportedUrl.create(original: "https://lj-site.com/thing1?style=mine") expect(Work.find_by_url("http://lj-site.com/thing1?style=other")).to eq(work) work.destroy end it "gets the work from cache when searching for an imported work by URL" do url = "http://lj-site.com/thing2" - work = create(:work, imported_from_url: url) + work = create(:work) + work.imported_url = ImportedUrl.new(original: url) expect(Rails.cache.read(Work.find_by_url_cache_key(url))).to be_nil expect(Work.find_by_url(url)).to eq(work) expect(Rails.cache.read(Work.find_by_url_cache_key(url))).to eq(work) diff --git a/test/fixtures/imported_urls.yml b/test/fixtures/imported_urls.yml new file mode 100644 index 00000000000..5e08485005f --- /dev/null +++ b/test/fixtures/imported_urls.yml @@ -0,0 +1,15 @@ +--- +imported_url_00001: + created_at: 2008-12-06 22:56:52 Z + updated_at: 2008-12-06 22:56:52 Z + id: 1 + work_id: 82 + original: http://www.trickster.org/llwyden/misc/cracked.html + minimal: + minimal_no_protocol_no_www: + no_www: + with_www: + with_http: + with_https: + encoded: + decoded: From 6e21246f603801c130b35c22c0212aae275c0b17 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Mon, 25 May 2026 19:45:17 -0500 Subject: [PATCH 02/14] Update references to imported_from_url where they use the work column --- app/controllers/api/v2/works_controller.rb | 2 +- app/controllers/opendoors/tools_controller.rb | 4 +-- app/models/search/work_indexer.rb | 3 --- app/models/story_parser.rb | 3 +-- app/models/work.rb | 27 +++++++++++-------- lib/tasks/opendoors.rake | 11 +++++--- 6 files changed, 27 insertions(+), 23 deletions(-) diff --git a/app/controllers/api/v2/works_controller.rb b/app/controllers/api/v2/works_controller.rb index 3dee15833ef..724a2715cc6 100644 --- a/app/controllers/api/v2/works_controller.rb +++ b/app/controllers/api/v2/works_controller.rb @@ -127,7 +127,7 @@ def find_work_by_import_url(original_url) message = "Please provide the original URL for the work." else # We know the url will be identical no need for a call to find_by_url - works = Work.where(imported_from_url: original_url) + works = Work.joins(:imported_url).where(imported_url: { original: @imported_from_url }) message = if works.empty? "No work has been imported from \"" + original_url + "\"." else diff --git a/app/controllers/opendoors/tools_controller.rb b/app/controllers/opendoors/tools_controller.rb index 1f3cb805a64..60d526474d6 100644 --- a/app/controllers/opendoors/tools_controller.rb +++ b/app/controllers/opendoors/tools_controller.rb @@ -46,12 +46,12 @@ def url_update flash[:error] = ts("The imported-from url you are trying to set doesn't seem valid.") else # check for any other works - works = Work.where(imported_from_url: @imported_from_url) + works = Work.joins(:imported_url).where(imported_url: { original: @imported_from_url }) if works.count > 0 flash[:error] = ts("There is already a work imported from the url %{url}.", url: @imported_from_url) else # ok let's try to update - @work.update_attribute(:imported_from_url, @imported_from_url) + @work.imported_url&.update(original: @imported_from_url) flash[:notice] = "Updated imported-from url for #{@work.title} to #{@imported_from_url}" end end diff --git a/app/models/search/work_indexer.rb b/app/models/search/work_indexer.rb index f7c4c260143..c3a0dbcb6d6 100644 --- a/app/models/search/work_indexer.rb +++ b/app/models/search/work_indexer.rb @@ -55,9 +55,6 @@ def self.mapping title_to_sort_on: { type: "keyword" }, - imported_from_url: { - type: "keyword" - }, work_types: { type: "keyword" }, diff --git a/app/models/story_parser.rb b/app/models/story_parser.rb index c9e869a21f5..74fea824db4 100644 --- a/app/models/story_parser.rb +++ b/app/models/story_parser.rb @@ -300,8 +300,7 @@ def set_work_attributes(work, location = "", options = {}) raise Error, "Work could not be downloaded" if work.nil? @options = options - work.imported_from_url = location # @todo remove this as part of AO3-6979 - work.imported_url = ImportedUrl.new(original: work.imported_from_url) + work.imported_url = ImportedUrl.new(original: location) work.ip_address = options[:ip_address] work.expected_number_of_chapters = work.chapters.length diff --git a/app/models/work.rb b/app/models/work.rb index 9a10e1dd0b6..6084b121f73 100755 --- a/app/models/work.rb +++ b/app/models/work.rb @@ -298,7 +298,7 @@ def expire_caches series.each(&:expire_caches) Work.expire_work_blurb_version(id) - Work.flush_find_by_url_cache unless imported_from_url.blank? + Work.flush_find_by_url_cache unless imported_url == nil end def update_pseud_and_collection_indexes @@ -419,16 +419,21 @@ def self.find_by_url_cache_key(url) def self.find_by_url_uncached(url) url = UrlFormatter.new(url) - Work.where(imported_from_url: url.original).first || - Work.where(imported_from_url: [url.minimal, - url.with_http, url.with_https, - url.no_www, url.with_www, - url.encoded, url.decoded, - url.minimal_no_protocol_no_www]).first || - Work.where("imported_from_url LIKE ? or imported_from_url LIKE ?", - "http://#{url.minimal_no_protocol_no_www}%", - "https://#{url.minimal_no_protocol_no_www}%").select do |w| - work_url = UrlFormatter.new(w.imported_from_url) + + Work.joins(:imported_url).where(imported_url: { original: @imported_from_url }).first || + Work.joins(:imported_url).where(imported_url: { minimal: url.minimal }).first || + Work.joins(:imported_url).where(imported_url: { with_http: url.with_http }).first || + Work.joins(:imported_url).where(imported_url: { with_https: url.with_https }).first || + Work.joins(:imported_url).where(imported_url: { no_www: url.no_www }).first || + Work.joins(:imported_url).where(imported_url: { with_www: url.with_www }).first || + Work.joins(:imported_url).where(imported_url: { encoded: url.encoded }).first || + Work.joins(:imported_url).where(imported_url: { decoded: url.decoded }).first || + Work.joins(:imported_url).where(imported_url: { minimal_no_protocol_no_www: url.minimal_no_protocol_no_www }).first || + Work.joins(:imported_url).where(imported_url: { minimal: url.minimal }).first || + Work.joins(:imported_url).where("imported_urls.original LIKE ? or imported_urls.original LIKE ?", + "http://#{url.minimal_no_protocol_no_www}%", + "https://#{url.minimal_no_protocol_no_www}%").select do |w| + work_url = UrlFormatter.new(w.imported_url&.original) %w[original minimal no_www with_www with_http with_https encoded decoded].any? do |method| work_url.send(method) == url.send(method) end diff --git a/lib/tasks/opendoors.rake b/lib/tasks/opendoors.rake index da057d2303f..3101c8d872c 100644 --- a/lib/tasks/opendoors.rake +++ b/lib/tasks/opendoors.rake @@ -4,12 +4,15 @@ namespace :opendoors do def update_work(row) begin work = Work.find(row["AO3 id"]) - if work&.imported_from_url.blank? || work&.imported_from_url == row["URL Imported From"] - work.imported_from_url = row["Original URL"] + if work&.imported_url.nil? + work.imported_url = ImportedUrl.new() + end + if work&.imported_url.original.blank? || work&.imported_url.original == row["URL Imported From"] + work.imported_url.original = row["Original URL"] work.save! - "#{work.id}\twas updated: its import url is now #{work.imported_from_url}" + "#{work.id}\twas updated: its import url is now #{work.imported_url.original}" else - "#{work.id}\twas not changed: its import url is #{work.imported_from_url}" + "#{work.id}\twas not changed: its import url is #{work.imported_url.original}" end rescue StandardError => e "#{row["AO3 id"]}\twas not changed: #{e}" From 9e3a6785603e77c32772477679ece8d18f2e35b2 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Mon, 25 May 2026 19:45:41 -0500 Subject: [PATCH 03/14] Add migration for getting rid of the imported_from_url index and column --- ...60519171051_remove_imported_from_url_field_from_works.rb | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb diff --git a/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb b/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb new file mode 100644 index 00000000000..89526889411 --- /dev/null +++ b/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb @@ -0,0 +1,6 @@ +class RemoveImportedFromUrlFieldFromWorks < ActiveRecord::Migration[8.1] + def change + remove_index :works, :imported_from_url + remove_column :works, :imported_from_url + end +end From 9a4faddac95fda4f77cdebd6ca4de6ce4612bc61 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 12:56:05 -0500 Subject: [PATCH 04/14] Remove backfill script and tests as the column they work with is now gone --- lib/tasks/work_import_urls.rake | 30 -------------------- spec/lib/tasks/work_import_urls.rake_spec.rb | 23 --------------- 2 files changed, 53 deletions(-) delete mode 100644 lib/tasks/work_import_urls.rake delete mode 100644 spec/lib/tasks/work_import_urls.rake_spec.rb diff --git a/lib/tasks/work_import_urls.rake b/lib/tasks/work_import_urls.rake deleted file mode 100644 index 49b5a8b3e54..00000000000 --- a/lib/tasks/work_import_urls.rake +++ /dev/null @@ -1,30 +0,0 @@ -namespace :work_import_urls do - desc "Backfill work_import_urls from works.imported_from_url (run BEFORE removing the column)" - task backfill: :environment do - batch_size = 1000 - - # Query the column directly since this task runs before column removal - scope = Work.where("imported_from_url IS NOT NULL AND imported_from_url != ''") - total = scope.count - processed = 0 - failed = 0 - - puts "Backfilling #{total} work_import_urls records..." - - scope.find_each(batch_size: batch_size) do |work| - next if ImportedUrl.exists?(work_id: work.id) - - begin - work.imported_url = ImportedUrl.create(original: work.imported_from_url) - rescue StandardError => e - puts "------- Error on import for work_id #{work.id}: #{e.inspect}" - failed += 1 - next - end - processed += 1 - puts "Processed #{processed}/#{total}" if (processed % batch_size).zero? - end - - puts "Done! Backfilled #{processed} records, failed #{failed} records." - end -end diff --git a/spec/lib/tasks/work_import_urls.rake_spec.rb b/spec/lib/tasks/work_import_urls.rake_spec.rb deleted file mode 100644 index 77270a1d4ae..00000000000 --- a/spec/lib/tasks/work_import_urls.rake_spec.rb +++ /dev/null @@ -1,23 +0,0 @@ -require "spec_helper" -require "rake" - -describe "rake work_import_urls:backfill" do - it "should import the urls" do - work = create(:work) - work.imported_from_url = "http://www.trickster.org/llwyden/misc/cracked.html" - work.save!(validate: false) - - work_not_migrate = create(:work) - work_not_migrate.imported_from_url = "" - work_not_migrate.save!(validate: false) - - subject.invoke - - expect(ImportedUrl.count).to eq(1) - expect(ImportedUrl.first.original).to eq(work.imported_from_url) - - subject.invoke - - expect(ImportedUrl.count).to eq(1) - end -end From 3d74fff3896f509edfee75d9f7310f72d79a10b2 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 13:23:18 -0500 Subject: [PATCH 05/14] Fix reference to original_url on controller search --- app/controllers/api/v2/works_controller.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/api/v2/works_controller.rb b/app/controllers/api/v2/works_controller.rb index 724a2715cc6..6557d24774d 100644 --- a/app/controllers/api/v2/works_controller.rb +++ b/app/controllers/api/v2/works_controller.rb @@ -127,7 +127,7 @@ def find_work_by_import_url(original_url) message = "Please provide the original URL for the work." else # We know the url will be identical no need for a call to find_by_url - works = Work.joins(:imported_url).where(imported_url: { original: @imported_from_url }) + works = Work.joins(:imported_url).where(imported_url: { original: original_url }) message = if works.empty? "No work has been imported from \"" + original_url + "\"." else From b1d17e0aeabe03a99e939eb73b3310296b927a80 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 13:25:55 -0500 Subject: [PATCH 06/14] Fix the work declaration with the related import url --- spec/controllers/api/v2/api_works_search_spec.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spec/controllers/api/v2/api_works_search_spec.rb b/spec/controllers/api/v2/api_works_search_spec.rb index b8349881fb2..861e427713e 100644 --- a/spec/controllers/api/v2/api_works_search_spec.rb +++ b/spec/controllers/api/v2/api_works_search_spec.rb @@ -12,8 +12,9 @@ def post_search_result(valid_params) describe "API v2 WorksController - Search", type: :request, work_search: true do describe "valid work URL request" do let!(:work) { - create(:work) - :work.imported_url = ImportedUrl.new(original: "foo") + work = create(:work) + work.imported_url = ImportedUrl.create(original: "foo") + work } it "returns 200 OK" do From b483e7388d65b5bba51ad4136c10c924ad79fdd5 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 13:38:24 -0500 Subject: [PATCH 07/14] Create the imported URL if it doesn't exist on OpenDoors --- app/controllers/opendoors/tools_controller.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/controllers/opendoors/tools_controller.rb b/app/controllers/opendoors/tools_controller.rb index 60d526474d6..c2b3bf2b7f1 100644 --- a/app/controllers/opendoors/tools_controller.rb +++ b/app/controllers/opendoors/tools_controller.rb @@ -51,7 +51,11 @@ def url_update flash[:error] = ts("There is already a work imported from the url %{url}.", url: @imported_from_url) else # ok let's try to update - @work.imported_url&.update(original: @imported_from_url) + if @work.imported_url == nil + @work.imported_url = ImportedUrl.create(original: @imported_from_url) + else + @work.imported_url.update(original: @imported_from_url) + end flash[:notice] = "Updated imported-from url for #{@work.title} to #{@imported_from_url}" end end From 1dd9ce01029f9c7612d38089262307173964cf63 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 13:38:48 -0500 Subject: [PATCH 08/14] Fix ToolsController test to use the new imported url location --- spec/controllers/opendoors/tools_controller_spec.rb | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/spec/controllers/opendoors/tools_controller_spec.rb b/spec/controllers/opendoors/tools_controller_spec.rb index 2cab4b01877..386dda645c4 100644 --- a/spec/controllers/opendoors/tools_controller_spec.rb +++ b/spec/controllers/opendoors/tools_controller_spec.rb @@ -67,7 +67,11 @@ context "with a valid work ID" do let(:work) { create(:work) } - let(:work_with_imported_from_url) { create(:work, imported_from_url: "http://example.org/my-immortal") } + let(:work_with_imported_from_url) { + work = create(:work) + work.imported_url = ImportedUrl.create(original: "http://example.org/my-immortal") + work + } it "redirects to tools if imported-from URL is missing" do post :url_update, params: { work_url: "/works/#{work.id}/" } @@ -80,7 +84,7 @@ end it "redirects to tools if imported-from URL is already used in another work" do - url = work_with_imported_from_url.imported_from_url + url = work_with_imported_from_url.imported_url.original post :url_update, params: { work_url: "/works/#{work.id}/", imported_from_url: url } it_redirects_to_with_error(opendoors_tools_path(imported_from_url: url), "There is already a work imported from the url #{url}.") end @@ -90,7 +94,7 @@ post :url_update, params: { work_url: "http://example.org/works/#{work.id}/", imported_from_url: url } it_redirects_to_with_notice(opendoors_tools_path(imported_from_url: url), "Updated imported-from url for #{work.title} to #{url}") work.reload - expect(work.imported_from_url).to eq(url) + expect(work.imported_url.original).to eq(url) end it "updates work if imported-from URL has non-ASCII characters" do @@ -99,7 +103,7 @@ encoded_url = URI::Parser.new.escape(url) it_redirects_to_with_notice(opendoors_tools_path(imported_from_url: encoded_url), "Updated imported-from url for #{work.title} to #{encoded_url}") work.reload - expect(work.imported_from_url).to eq(encoded_url) + expect(work.imported_url.original).to eq(encoded_url) end end end From 9967c858f39ab5b46b94eb9ba952f692711781d2 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 13:47:18 -0500 Subject: [PATCH 09/14] Rubocop fixes --- app/controllers/opendoors/tools_controller.rb | 2 +- app/models/work.rb | 22 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/app/controllers/opendoors/tools_controller.rb b/app/controllers/opendoors/tools_controller.rb index c2b3bf2b7f1..f87e7b5b461 100644 --- a/app/controllers/opendoors/tools_controller.rb +++ b/app/controllers/opendoors/tools_controller.rb @@ -51,7 +51,7 @@ def url_update flash[:error] = ts("There is already a work imported from the url %{url}.", url: @imported_from_url) else # ok let's try to update - if @work.imported_url == nil + if @work.imported_url.nil? @work.imported_url = ImportedUrl.create(original: @imported_from_url) else @work.imported_url.update(original: @imported_from_url) diff --git a/app/models/work.rb b/app/models/work.rb index 6084b121f73..1300c3561a5 100755 --- a/app/models/work.rb +++ b/app/models/work.rb @@ -298,7 +298,7 @@ def expire_caches series.each(&:expire_caches) Work.expire_work_blurb_version(id) - Work.flush_find_by_url_cache unless imported_url == nil + Work.flush_find_by_url_cache unless imported_url.nil? end def update_pseud_and_collection_indexes @@ -421,16 +421,16 @@ def self.find_by_url_uncached(url) url = UrlFormatter.new(url) Work.joins(:imported_url).where(imported_url: { original: @imported_from_url }).first || - Work.joins(:imported_url).where(imported_url: { minimal: url.minimal }).first || - Work.joins(:imported_url).where(imported_url: { with_http: url.with_http }).first || - Work.joins(:imported_url).where(imported_url: { with_https: url.with_https }).first || - Work.joins(:imported_url).where(imported_url: { no_www: url.no_www }).first || - Work.joins(:imported_url).where(imported_url: { with_www: url.with_www }).first || - Work.joins(:imported_url).where(imported_url: { encoded: url.encoded }).first || - Work.joins(:imported_url).where(imported_url: { decoded: url.decoded }).first || - Work.joins(:imported_url).where(imported_url: { minimal_no_protocol_no_www: url.minimal_no_protocol_no_www }).first || - Work.joins(:imported_url).where(imported_url: { minimal: url.minimal }).first || - Work.joins(:imported_url).where("imported_urls.original LIKE ? or imported_urls.original LIKE ?", + Work.joins(:imported_url).where(imported_url: { minimal: url.minimal }).first || + Work.joins(:imported_url).where(imported_url: { with_http: url.with_http }).first || + Work.joins(:imported_url).where(imported_url: { with_https: url.with_https }).first || + Work.joins(:imported_url).where(imported_url: { no_www: url.no_www }).first || + Work.joins(:imported_url).where(imported_url: { with_www: url.with_www }).first || + Work.joins(:imported_url).where(imported_url: { encoded: url.encoded }).first || + Work.joins(:imported_url).where(imported_url: { decoded: url.decoded }).first || + Work.joins(:imported_url).where(imported_url: { minimal_no_protocol_no_www: url.minimal_no_protocol_no_www }).first || + Work.joins(:imported_url).where(imported_url: { minimal: url.minimal }).first || + Work.joins(:imported_url).where("imported_urls.original LIKE ? or imported_urls.original LIKE ?", "http://#{url.minimal_no_protocol_no_www}%", "https://#{url.minimal_no_protocol_no_www}%").select do |w| work_url = UrlFormatter.new(w.imported_url&.original) From e1dbc25f53ddf169b8ec72d7a69992708955261c Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 14:06:01 -0500 Subject: [PATCH 10/14] More rubocop --- app/models/work.rb | 4 ++-- ...60519171051_remove_imported_from_url_field_from_works.rb | 2 ++ lib/tasks/opendoors.rake | 6 ++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/app/models/work.rb b/app/models/work.rb index 1300c3561a5..74e7883b316 100755 --- a/app/models/work.rb +++ b/app/models/work.rb @@ -431,8 +431,8 @@ def self.find_by_url_uncached(url) Work.joins(:imported_url).where(imported_url: { minimal_no_protocol_no_www: url.minimal_no_protocol_no_www }).first || Work.joins(:imported_url).where(imported_url: { minimal: url.minimal }).first || Work.joins(:imported_url).where("imported_urls.original LIKE ? or imported_urls.original LIKE ?", - "http://#{url.minimal_no_protocol_no_www}%", - "https://#{url.minimal_no_protocol_no_www}%").select do |w| + "http://#{url.minimal_no_protocol_no_www}%", + "https://#{url.minimal_no_protocol_no_www}%").select do |w| work_url = UrlFormatter.new(w.imported_url&.original) %w[original minimal no_www with_www with_http with_https encoded decoded].any? do |method| work_url.send(method) == url.send(method) diff --git a/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb b/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb index 89526889411..fd0018257e3 100644 --- a/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb +++ b/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb @@ -1,4 +1,6 @@ class RemoveImportedFromUrlFieldFromWorks < ActiveRecord::Migration[8.1] + uses_departure! if Rails.env.staging? || Rails.env.production? + def change remove_index :works, :imported_from_url remove_column :works, :imported_from_url diff --git a/lib/tasks/opendoors.rake b/lib/tasks/opendoors.rake index 3101c8d872c..09e02fd4555 100644 --- a/lib/tasks/opendoors.rake +++ b/lib/tasks/opendoors.rake @@ -4,10 +4,8 @@ namespace :opendoors do def update_work(row) begin work = Work.find(row["AO3 id"]) - if work&.imported_url.nil? - work.imported_url = ImportedUrl.new() - end - if work&.imported_url.original.blank? || work&.imported_url.original == row["URL Imported From"] + work.imported_url = ImportedUrl.new if work&.imported_url.nil? + if work&.imported_url&.original.blank? || work&.imported_url&.original == row["URL Imported From"] work.imported_url.original = row["Original URL"] work.save! "#{work.id}\twas updated: its import url is now #{work.imported_url.original}" From 5abc6b74a298c1be8e14ca6d88cb05bb28306097 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 14:51:05 -0500 Subject: [PATCH 11/14] Update migration to mark it irreversible --- ...60519171051_remove_imported_from_url_field_from_works.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb b/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb index fd0018257e3..fd5b3e7d226 100644 --- a/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb +++ b/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb @@ -1,8 +1,12 @@ class RemoveImportedFromUrlFieldFromWorks < ActiveRecord::Migration[8.1] uses_departure! if Rails.env.staging? || Rails.env.production? - def change + def up remove_index :works, :imported_from_url remove_column :works, :imported_from_url end + + def down + raise ActiveRecord::IrreversibleMigration, "This migration cannot be reverted because we can't restore the imported_from_url column data" + end end From aacf4fb1476e92a4262b58f2b040543c09fde0b5 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 14:51:22 -0500 Subject: [PATCH 12/14] Fix spec blocks for rubocop --- spec/controllers/api/v2/api_works_search_spec.rb | 4 ++-- spec/controllers/opendoors/tools_controller_spec.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spec/controllers/api/v2/api_works_search_spec.rb b/spec/controllers/api/v2/api_works_search_spec.rb index 861e427713e..26a694eec3f 100644 --- a/spec/controllers/api/v2/api_works_search_spec.rb +++ b/spec/controllers/api/v2/api_works_search_spec.rb @@ -11,11 +11,11 @@ def post_search_result(valid_params) describe "API v2 WorksController - Search", type: :request, work_search: true do describe "valid work URL request" do - let!(:work) { + let!(:work) do work = create(:work) work.imported_url = ImportedUrl.create(original: "foo") work - } + end it "returns 200 OK" do valid_params = { works: [{ original_urls: %w(bar foo) }] } diff --git a/spec/controllers/opendoors/tools_controller_spec.rb b/spec/controllers/opendoors/tools_controller_spec.rb index 386dda645c4..8f2418d8694 100644 --- a/spec/controllers/opendoors/tools_controller_spec.rb +++ b/spec/controllers/opendoors/tools_controller_spec.rb @@ -67,11 +67,11 @@ context "with a valid work ID" do let(:work) { create(:work) } - let(:work_with_imported_from_url) { + let!(:work_with_imported_from_url) do work = create(:work) work.imported_url = ImportedUrl.create(original: "http://example.org/my-immortal") work - } + end it "redirects to tools if imported-from URL is missing" do post :url_update, params: { work_url: "/works/#{work.id}/" } From 25f25ac3bc119655f2de404c4bbdc4f106472ab0 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 15:31:28 -0500 Subject: [PATCH 13/14] Update Opendoors to not have a declared class (Rubocop request) and fix issues with new imported url object --- spec/lib/tasks/opendoors.rake_spec.rb | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/spec/lib/tasks/opendoors.rake_spec.rb b/spec/lib/tasks/opendoors.rake_spec.rb index 744d0764a42..92b8297d398 100644 --- a/spec/lib/tasks/opendoors.rake_spec.rb +++ b/spec/lib/tasks/opendoors.rake_spec.rb @@ -9,8 +9,9 @@ original_url2 = "http://another/2" let!(:work_with_temp_url) { - create(:work, id: id1) - :work.imported_url = ImportedUrl.new(original: temp_url) + work_with_temp_url = create(:work, id: id1) + work_with_temp_url.imported_url = ImportedUrl.create(original: temp_url) + work_with_temp_url } let!(:work_with_no_url) { create(:work, id: id2) } @@ -31,8 +32,11 @@ end context "UrlUpdater" do - let(:url_updater) { UrlUpdater.new } - let(:work_with_other_url) { create(:work, imported_from_url: "http://another/1") } + let!(:work_with_other_url) do + work_with_other_url = create(:work) + work_with_other_url.imported_url = ImportedUrl.create(original: "http://another/1") + work_with_other_url + end it "returns an error if the work is not found" do row = { @@ -40,7 +44,7 @@ "Original URL" => "http://another/2", "AO3 id" => 7777773 } - result = url_updater.update_work(row) + result = update_work(row) expect(result).to eq("7777773\twas not changed: Couldn't find Work with 'id'=7777773") end @@ -50,7 +54,7 @@ "Original URL" => "http://another/2", "AO3 id" => work_with_other_url.id } - result = url_updater.update_work(row) + result = update_work(row) expect(result).to match("\\d+\twas not changed: its import url is http://another/1") end @@ -60,7 +64,7 @@ "Original URL" => "http://another/2", "AO3 id" => work_with_no_url.id } - result = url_updater.update_work(row) + result = update_work(row) expect(result).to match("\\d+\twas updated: its import url is now http://another/2") end @@ -70,7 +74,7 @@ "Original URL" => "http://another/2", "AO3 id" => work_with_temp_url.id } - result = url_updater.update_work(row) + result = update_work(row) expect(result).to match("\\d+\twas updated: its import url is now http://another/2") end end From 113eef4e00644601584d1bd02f00634301d652f3 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Sat, 6 Jun 2026 15:33:28 -0500 Subject: [PATCH 14/14] Fix the update_work method to work with the new importedUrl changes --- lib/tasks/opendoors.rake | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/lib/tasks/opendoors.rake b/lib/tasks/opendoors.rake index 09e02fd4555..0f6f4208cd3 100644 --- a/lib/tasks/opendoors.rake +++ b/lib/tasks/opendoors.rake @@ -1,20 +1,19 @@ namespace :opendoors do - - class UrlUpdater - def update_work(row) - begin - work = Work.find(row["AO3 id"]) - work.imported_url = ImportedUrl.new if work&.imported_url.nil? - if work&.imported_url&.original.blank? || work&.imported_url&.original == row["URL Imported From"] - work.imported_url.original = row["Original URL"] - work.save! - "#{work.id}\twas updated: its import url is now #{work.imported_url.original}" - else - "#{work.id}\twas not changed: its import url is #{work.imported_url.original}" - end - rescue StandardError => e - "#{row["AO3 id"]}\twas not changed: #{e}" + def update_work(row) + begin + work = Work.find(row["AO3 id"]) + # ImportedUrl needs an original value, so we use the imported url temporarily and only save if we're actually updating it + work.imported_url = ImportedUrl.new(:original => row["URL Imported From"]) if work.imported_url.nil? + + if work.imported_url.original == row["URL Imported From"] + work.imported_url.original = row["Original URL"] + work.imported_url.save! + "#{work.id}\twas updated: its import url is now #{work.imported_url.original}" + else + "#{work.id}\twas not changed: its import url is #{work.imported_url.original}" end + rescue StandardError => e + "#{row["AO3 id"]}\twas not changed: #{e}" end end @@ -29,10 +28,8 @@ namespace :opendoors do begin f = File.open("opendoors_result.txt", "w") - url_updater = UrlUpdater.new - CSV.foreach(loc, headers: true) do |row| - result = url_updater.update_work(row) + result = update_work(row) puts result f.write(result) end