Code coverage for Booklr

Generated 2014-09-18T18:05:07-07:00

All Files (78.48% covered at 55.87 hits/line)

123 files in total. 5795 relevant lines. 4548 lines covered and 1247 lines missed

File	% covered	Lines	Relevant Lines	Lines covered	Lines missed	Avg. Hits / Line
app/controllers/api/base_controller.rb	55.56 %	17	9	5	4	0.6
app/controllers/api/book_version_categories_controller.rb	23.08 %	23	13	3	10	0.2
app/controllers/api/book_versions_controller.rb	30.0 %	18	10	3	7	0.3
app/controllers/api/categories_controller.rb	30.0 %	18	10	3	7	0.3
app/controllers/api/category_stats_controller.rb	25.0 %	26	16	4	12	0.3
app/controllers/api/product_stats_controller.rb	27.78 %	31	18	5	13	0.3
app/mailers/enterprise_reports_mailer.rb	100.0 %	32	18	18	0	4.0
app/mailers/internal_reports_mailer.rb	33.33 %	20	12	4	8	0.3
app/mailers/notification_mailer.rb	100.0 %	14	9	9	0	2.8
app/models/amazon_api_response.rb	100.0 %	56	30	30	0	2.2
app/models/amazon_api_response_item.rb	100.0 %	50	20	20	0	2.7
app/models/amazon_author_page.rb	100.0 %	9	5	5	0	1.2
app/models/amazon_best_sellers_page.rb	92.75 %	126	69	64	5	17.3
app/models/amazon_category_collection.rb	100.0 %	18	9	9	0	6.0
app/models/amazon_kindle_daily_deals_page.rb	96.88 %	59	32	31	1	4.7
app/models/amazon_kindle_monthly_deals_page.rb	81.82 %	19	11	9	2	6.9
app/models/amazon_kindle_select_page.rb	100.0 %	31	20	20	0	5.6
app/models/amazon_page.rb	100.0 %	29	19	19	0	57.4
app/models/amazon_product_page.rb	89.63 %	758	405	363	42	13.1
app/models/amazon_search_page.rb	43.18 %	81	44	19	25	1.6
app/models/apple_top_books_rss_feed.rb	100.0 %	37	17	17	0	60.9
app/models/bn_book_page.rb	100.0 %	117	50	50	0	4.7
app/models/bn_category_page.rb	50.0 %	10	6	3	3	0.5
app/models/bn_nook_daily_find_page.rb	96.67 %	45	30	29	1	27.4
app/models/bn_nook_under299.rb	100.0 %	32	23	23	0	16.3
app/models/bn_page.rb	66.67 %	14	9	6	3	6.0
app/models/bn_search_page.rb	80.95 %	45	21	17	4	1.9
app/models/book_version_category.rb	100.0 %	17	7	7	0	1.0
app/models/book_version_exception.rb	100.0 %	17	6	6	0	1.0
app/models/book_version_status_collection.rb	100.0 %	9	5	5	0	3.8
app/models/booklr_stat.rb	100.0 %	60	18	18	0	1.2
app/models/category_stat.rb	100.0 %	15	9	9	0	1.0
app/models/category_stats_collection.rb	60.0 %	9	5	3	2	0.6
app/models/goodreads_book_page.rb	100.0 %	42	19	19	0	4.5
app/models/page.rb	89.36 %	78	47	42	5	261.1
app/models/report.rb	35.9 %	65	39	14	25	0.5
app/models/report_batch.rb	38.46 %	23	13	5	8	0.4
app/models/report_cards/fish_richardson.rb	21.88 %	61	32	7	25	0.2
app/models/report_cards/random_house_corporate.rb	100.0 %	45	14	14	0	3.4
app/models/report_cards/random_house_corporate2.rb	42.86 %	46	14	6	8	0.4
app/models/report_cards/random_house_corporate_apple.rb	100.0 %	36	20	20	0	4.6
app/models/report_cards/report_card.rb	100.0 %	164	91	91	0	14.7
app/models/report_cards/rhinc_customer_behavior.rb	100.0 %	44	14	14	0	29.9
app/models/similar_book_pricing_report.rb	8.51 %	133	94	8	86	0.1
app/models/top100_price_distribution_report.rb	10.13 %	114	79	8	71	0.1
app/models/top100_projected_rank_report.rb	13.95 %	68	43	6	37	0.1
app/models/tracked_book_version.rb	100.0 %	47	21	21	0	3.2
app/models/user.rb	100.0 %	30	12	12	0	1.3
app/models/warehouse_book_version.rb	94.29 %	292	175	165	10	29.9
app/models/warehouse_category.rb	96.97 %	119	66	64	2	56.8
app/models/warehouse_date.rb	88.89 %	16	9	8	1	0.9
app/models/warehouse_list_stat.rb	100.0 %	12	7	7	0	1.0
app/models/warehouse_region.rb	100.0 %	20	10	10	0	3.4
app/models/warehouse_stat.rb	40.74 %	188	108	44	64	2.3
app/models/warehouse_trend.rb	100.0 %	9	4	4	0	1.0
app/modules/amazon_api.rb	100.0 %	274	148	148	0	4.6
app/modules/data_analysis.rb	9.76 %	125	82	8	74	0.1
app/modules/data_cleanup.rb	100.0 %	69	22	22	0	1.5
app/modules/enterprise_reports.rb	100.0 %	256	141	141	0	24.4
app/modules/formulas.rb	100.0 %	11	6	6	0	1.3
app/modules/ingestion_queue.rb	100.0 %	57	31	31	0	10.0
app/modules/itunes_api.rb	100.0 %	45	31	31	0	2.8
app/modules/list_stat_workers.rb	100.0 %	53	33	33	0	1.2
app/modules/merge_purge.rb	75.92 %	426	245	186	59	8.5
app/modules/mongo_utilities.rb	100.0 %	57	30	30	0	1823.2
app/modules/postgres_configuration.rb	100.0 %	28	12	12	0	1.2
app/modules/postgres_utilities.rb	55.56 %	50	27	15	12	68.8
app/modules/proxy_utilities.rb	73.91 %	43	23	17	6	20.0
app/modules/redis_utilities.rb	80.56 %	132	72	58	14	18.5
app/modules/report_generator.rb	100.0 %	91	48	48	0	79.1
app/modules/report_utilities.rb	60.0 %	18	10	6	4	11.1
app/modules/scraper_utilities.rb	100.0 %	81	38	38	0	161.9
app/modules/stats_scraper.rb	100.0 %	118	48	48	0	2.0
app/modules/utilities.rb	98.94 %	170	94	93	1	2301.0
app/modules/validations.rb	100.0 %	34	18	18	0	1.7
app/serializers/base_serializer.rb	100.0 %	20	11	11	0	17.4
app/serializers/book_version_category_serializer.rb	100.0 %	6	4	4	0	1.0
app/serializers/book_version_serializer.rb	100.0 %	6	4	4	0	1.0
app/serializers/category_serializer.rb	100.0 %	6	4	4	0	1.0
app/serializers/category_stat_serializer.rb	100.0 %	7	4	4	0	1.0
app/serializers/list_stat_serializer.rb	100.0 %	6	4	4	0	1.0
app/serializers/product_stat_serializer.rb	100.0 %	7	4	4	0	1.0
app/workers/amazon_api_workers.rb	100.0 %	22	14	14	0	1.9
app/workers/backup_workers.rb	28.13 %	164	96	27	69	0.8
app/workers/big_data_reports.rb	17.01 %	269	147	25	122	0.2
app/workers/book_version_validation_workers.rb	92.96 %	121	71	66	5	1.8
app/workers/book_version_workers.rb	83.11 %	364	219	182	37	1.9
app/workers/booklr_stat_workers.rb	100.0 %	151	90	90	0	13.7
app/workers/data_cleanup_workers.rb	100.0 %	48	8	8	0	1.0
app/workers/enterprise_reports/daily_reports.rb	100.0 %	122	80	80	0	4.2
app/workers/enterprise_reports/exception_reports.rb	76.32 %	130	76	58	18	0.8
app/workers/enterprise_reports/list_stat_reports.rb	75.56 %	210	135	102	33	47.4
app/workers/enterprise_reports/one_time_reports.rb	30.77 %	100	65	20	45	0.3
app/workers/enterprise_reports/statistical_reports.rb	64.18 %	202	134	86	48	2.7
app/workers/enterprise_reports/weekly_reports.rb	100.0 %	235	116	116	0	2.8
app/workers/etl_workers.rb	82.14 %	537	252	207	45	1336.7
app/workers/maintenance_workers.rb	100.0 %	23	14	14	0	1.1
app/workers/merge_purge_jobs.rb	69.57 %	42	23	16	7	0.7
app/workers/merge_purge_workers.rb	100.0 %	165	109	109	0	1.0
app/workers/mongo_book_category_workers.rb	100.0 %	42	26	26	0	5.5
app/workers/mongo_book_version_exception_workers.rb	84.0 %	89	50	42	8	1.8
app/workers/mongo_list_stat_workers.rb	97.33 %	119	75	73	2	20.3
app/workers/mongo_promotion_page_workers.rb	100.0 %	127	78	78	0	2.2
app/workers/mongo_report_workers.rb	25.0 %	30	20	5	15	0.3
app/workers/mongo_workers.rb	53.54 %	224	99	53	46	0.5
app/workers/report_generator_worker.rb	100.0 %	13	9	9	0	1.3
app/workers/scheduler.rb	98.68 %	141	76	75	1	3.5
app/workers/scrape_test_workers.rb	36.96 %	120	46	17	29	0.5
app/workers/spider_workers.rb	100.0 %	105	57	57	0	2.9
app/workers/warehouse_category_workers.rb	100.0 %	154	55	55	0	4.3
lib/amazebot.rb	100.0 %	4	3	3	0	1.0
lib/bitmaskable.rb	100.0 %	50	21	21	0	1.0
lib/extensions/active_record.rb	94.51 %	139	91	86	5	108.2
lib/extensions/array.rb	100.0 %	37	19	19	0	4.0
lib/extensions/date.rb	94.44 %	27	18	17	1	1.1
lib/extensions/heroku_mongo_backup.rb	100.0 %	86	44	44	0	2.6
lib/extensions/rufus_scheduler.rb	36.0 %	43	25	9	16	12.3
lib/extensions/sidekiq.rb	61.11 %	68	36	22	14	7.2
lib/http_helper.rb	90.12 %	146	81	73	8	14.3
lib/json_column.rb	45.45 %	19	11	5	6	0.5
lib/scalers/heroku_scaler.rb	91.67 %	81	48	44	4	2.2
lib/smtp_api_header.rb	100.0 %	18	11	11	0	7.4
lib/urls.rb	94.59 %	71	37	35	2	8.2

Libraries (87.42% covered at 13.08 hits/line)

13 files in total. 445 relevant lines. 389 lines covered and 56 lines missed

File	% covered	Lines	Relevant Lines	Lines covered	Lines missed	Avg. Hits / Line
lib/amazebot.rb	100.0 %	4	3	3	0	1.0
lib/bitmaskable.rb	100.0 %	50	21	21	0	1.0
lib/extensions/active_record.rb	94.51 %	139	91	86	5	108.2
lib/extensions/array.rb	100.0 %	37	19	19	0	4.0
lib/extensions/date.rb	94.44 %	27	18	17	1	1.1
lib/extensions/heroku_mongo_backup.rb	100.0 %	86	44	44	0	2.6
lib/extensions/rufus_scheduler.rb	36.0 %	43	25	9	16	12.3
lib/extensions/sidekiq.rb	61.11 %	68	36	22	14	7.2
lib/http_helper.rb	90.12 %	146	81	73	8	14.3
lib/json_column.rb	45.45 %	19	11	5	6	0.5
lib/scalers/heroku_scaler.rb	91.67 %	81	48	44	4	2.2
lib/smtp_api_header.rb	100.0 %	18	11	11	0	7.4
lib/urls.rb	94.59 %	71	37	35	2	8.2

Jobs (100.0% covered at 0.0 hits/line)

0 files in total. 0.0 relevant lines. 0.0 lines covered and 0.0 lines missed

File	% covered	Lines	Relevant Lines	Lines covered	Lines missed	Avg. Hits / Line

Workers (76.34% covered at 52.22 hits/line)

28 files in total. 2240 relevant lines. 1710 lines covered and 530 lines missed

File	% covered	Lines	Relevant Lines	Lines covered	Lines missed	Avg. Hits / Line
app/workers/amazon_api_workers.rb	100.0 %	22	14	14	0	1.9
app/workers/backup_workers.rb	28.13 %	164	96	27	69	0.8
app/workers/big_data_reports.rb	17.01 %	269	147	25	122	0.2
app/workers/book_version_validation_workers.rb	92.96 %	121	71	66	5	1.8
app/workers/book_version_workers.rb	83.11 %	364	219	182	37	1.9
app/workers/booklr_stat_workers.rb	100.0 %	151	90	90	0	13.7
app/workers/data_cleanup_workers.rb	100.0 %	48	8	8	0	1.0
app/workers/enterprise_reports/daily_reports.rb	100.0 %	122	80	80	0	4.2
app/workers/enterprise_reports/exception_reports.rb	76.32 %	130	76	58	18	0.8
app/workers/enterprise_reports/list_stat_reports.rb	75.56 %	210	135	102	33	47.4
app/workers/enterprise_reports/one_time_reports.rb	30.77 %	100	65	20	45	0.3
app/workers/enterprise_reports/statistical_reports.rb	64.18 %	202	134	86	48	2.7
app/workers/enterprise_reports/weekly_reports.rb	100.0 %	235	116	116	0	2.8
app/workers/etl_workers.rb	82.14 %	537	252	207	45	1336.7
app/workers/maintenance_workers.rb	100.0 %	23	14	14	0	1.1
app/workers/merge_purge_jobs.rb	69.57 %	42	23	16	7	0.7
app/workers/merge_purge_workers.rb	100.0 %	165	109	109	0	1.0
app/workers/mongo_book_category_workers.rb	100.0 %	42	26	26	0	5.5
app/workers/mongo_book_version_exception_workers.rb	84.0 %	89	50	42	8	1.8
app/workers/mongo_list_stat_workers.rb	97.33 %	119	75	73	2	20.3
app/workers/mongo_promotion_page_workers.rb	100.0 %	127	78	78	0	2.2
app/workers/mongo_report_workers.rb	25.0 %	30	20	5	15	0.3
app/workers/mongo_workers.rb	53.54 %	224	99	53	46	0.5
app/workers/report_generator_worker.rb	100.0 %	13	9	9	0	1.3
app/workers/scheduler.rb	98.68 %	141	76	75	1	3.5
app/workers/scrape_test_workers.rb	36.96 %	120	46	17	29	0.5
app/workers/spider_workers.rb	100.0 %	105	57	57	0	2.9
app/workers/warehouse_category_workers.rb	100.0 %	154	55	55	0	4.3

Modules (85.33% covered at 227.15 hits/line)

20 files in total. 1159 relevant lines. 989 lines covered and 170 lines missed

File	% covered	Lines	Relevant Lines	Lines covered	Lines missed	Avg. Hits / Line
app/modules/amazon_api.rb	100.0 %	274	148	148	0	4.6
app/modules/data_analysis.rb	9.76 %	125	82	8	74	0.1
app/modules/data_cleanup.rb	100.0 %	69	22	22	0	1.5
app/modules/enterprise_reports.rb	100.0 %	256	141	141	0	24.4
app/modules/formulas.rb	100.0 %	11	6	6	0	1.3
app/modules/ingestion_queue.rb	100.0 %	57	31	31	0	10.0
app/modules/itunes_api.rb	100.0 %	45	31	31	0	2.8
app/modules/list_stat_workers.rb	100.0 %	53	33	33	0	1.2
app/modules/merge_purge.rb	75.92 %	426	245	186	59	8.5
app/modules/mongo_utilities.rb	100.0 %	57	30	30	0	1823.2
app/modules/postgres_configuration.rb	100.0 %	28	12	12	0	1.2
app/modules/postgres_utilities.rb	55.56 %	50	27	15	12	68.8
app/modules/proxy_utilities.rb	73.91 %	43	23	17	6	20.0
app/modules/redis_utilities.rb	80.56 %	132	72	58	14	18.5
app/modules/report_generator.rb	100.0 %	91	48	48	0	79.1
app/modules/report_utilities.rb	60.0 %	18	10	6	4	11.1
app/modules/scraper_utilities.rb	100.0 %	81	38	38	0	161.9
app/modules/stats_scraper.rb	100.0 %	118	48	48	0	2.0
app/modules/utilities.rb	98.94 %	170	94	93	1	2301.0
app/modules/validations.rb	100.0 %	34	18	18	0	1.7

Models (76.12% covered at 14.45 hits/line)

46 files in total. 1801 relevant lines. 1371 lines covered and 430 lines missed

File	% covered	Lines	Relevant Lines	Lines covered	Lines missed	Avg. Hits / Line
app/models/amazon_api_response.rb	100.0 %	56	30	30	0	2.2
app/models/amazon_api_response_item.rb	100.0 %	50	20	20	0	2.7
app/models/amazon_author_page.rb	100.0 %	9	5	5	0	1.2
app/models/amazon_best_sellers_page.rb	92.75 %	126	69	64	5	17.3
app/models/amazon_category_collection.rb	100.0 %	18	9	9	0	6.0
app/models/amazon_kindle_daily_deals_page.rb	96.88 %	59	32	31	1	4.7
app/models/amazon_kindle_monthly_deals_page.rb	81.82 %	19	11	9	2	6.9
app/models/amazon_kindle_select_page.rb	100.0 %	31	20	20	0	5.6
app/models/amazon_page.rb	100.0 %	29	19	19	0	57.4
app/models/amazon_product_page.rb	89.63 %	758	405	363	42	13.1
app/models/amazon_search_page.rb	43.18 %	81	44	19	25	1.6
app/models/apple_top_books_rss_feed.rb	100.0 %	37	17	17	0	60.9
app/models/bn_book_page.rb	100.0 %	117	50	50	0	4.7
app/models/bn_category_page.rb	50.0 %	10	6	3	3	0.5
app/models/bn_nook_daily_find_page.rb	96.67 %	45	30	29	1	27.4
app/models/bn_nook_under299.rb	100.0 %	32	23	23	0	16.3
app/models/bn_page.rb	66.67 %	14	9	6	3	6.0
app/models/bn_search_page.rb	80.95 %	45	21	17	4	1.9
app/models/book_version_category.rb	100.0 %	17	7	7	0	1.0
app/models/book_version_exception.rb	100.0 %	17	6	6	0	1.0
app/models/book_version_status_collection.rb	100.0 %	9	5	5	0	3.8
app/models/booklr_stat.rb	100.0 %	60	18	18	0	1.2
app/models/category_stat.rb	100.0 %	15	9	9	0	1.0
app/models/category_stats_collection.rb	60.0 %	9	5	3	2	0.6
app/models/goodreads_book_page.rb	100.0 %	42	19	19	0	4.5
app/models/page.rb	89.36 %	78	47	42	5	261.1
app/models/report.rb	35.9 %	65	39	14	25	0.5
app/models/report_batch.rb	38.46 %	23	13	5	8	0.4
app/models/report_cards/fish_richardson.rb	21.88 %	61	32	7	25	0.2
app/models/report_cards/random_house_corporate.rb	100.0 %	45	14	14	0	3.4
app/models/report_cards/random_house_corporate2.rb	42.86 %	46	14	6	8	0.4
app/models/report_cards/random_house_corporate_apple.rb	100.0 %	36	20	20	0	4.6
app/models/report_cards/report_card.rb	100.0 %	164	91	91	0	14.7
app/models/report_cards/rhinc_customer_behavior.rb	100.0 %	44	14	14	0	29.9
app/models/similar_book_pricing_report.rb	8.51 %	133	94	8	86	0.1
app/models/top100_price_distribution_report.rb	10.13 %	114	79	8	71	0.1
app/models/top100_projected_rank_report.rb	13.95 %	68	43	6	37	0.1
app/models/tracked_book_version.rb	100.0 %	47	21	21	0	3.2
app/models/user.rb	100.0 %	30	12	12	0	1.3
app/models/warehouse_book_version.rb	94.29 %	292	175	165	10	29.9
app/models/warehouse_category.rb	96.97 %	119	66	64	2	56.8
app/models/warehouse_date.rb	88.89 %	16	9	8	1	0.9
app/models/warehouse_list_stat.rb	100.0 %	12	7	7	0	1.0
app/models/warehouse_region.rb	100.0 %	20	10	10	0	3.4
app/models/warehouse_stat.rb	40.74 %	188	108	44	64	2.3
app/models/warehouse_trend.rb	100.0 %	9	4	4	0	1.0

API (52.25% covered at 1.95 hits/line)

13 files in total. 111 relevant lines. 58 lines covered and 53 lines missed

File	% covered	Lines	Relevant Lines	Lines covered	Lines missed	Avg. Hits / Line
app/controllers/api/base_controller.rb	55.56 %	17	9	5	4	0.6
app/controllers/api/book_version_categories_controller.rb	23.08 %	23	13	3	10	0.2
app/controllers/api/book_versions_controller.rb	30.0 %	18	10	3	7	0.3
app/controllers/api/categories_controller.rb	30.0 %	18	10	3	7	0.3
app/controllers/api/category_stats_controller.rb	25.0 %	26	16	4	12	0.3
app/controllers/api/product_stats_controller.rb	27.78 %	31	18	5	13	0.3
app/serializers/base_serializer.rb	100.0 %	20	11	11	0	17.4
app/serializers/book_version_category_serializer.rb	100.0 %	6	4	4	0	1.0
app/serializers/book_version_serializer.rb	100.0 %	6	4	4	0	1.0
app/serializers/category_serializer.rb	100.0 %	6	4	4	0	1.0
app/serializers/category_stat_serializer.rb	100.0 %	7	4	4	0	1.0
app/serializers/list_stat_serializer.rb	100.0 %	6	4	4	0	1.0
app/serializers/product_stat_serializer.rb	100.0 %	7	4	4	0	1.0

Mailers (79.49% covered at 2.37 hits/line)

3 files in total. 39 relevant lines. 31 lines covered and 8 lines missed

File	% covered	Lines	Relevant Lines	Lines covered	Lines missed	Avg. Hits / Line
app/mailers/enterprise_reports_mailer.rb	100.0 %	32	18	18	0	4.0
app/mailers/internal_reports_mailer.rb	33.33 %	20	12	4	8	0.3
app/mailers/notification_mailer.rb	100.0 %	14	9	9	0	2.8

    
      
        
          1
          
          class API::BaseController < ApplicationController
        
      
        
          1
          
            before_filter :authenticate
        
      
        
          
          
          
        
      
        
          1
          
            def authenticate
        
      
        
          
          
              authenticate_or_request_with_http_basic do |username, password|
        
      
        
          
          
                username == MARKET_DATA_API_CONFIG[:username]
        
      
        
          
          
                password == MARKET_DATA_API_CONFIG[:password]
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def api_response(status, data = {})
        
      
        
          
          
              {version: params[:version],
        
      
        
          
          
               status: status}.merge data
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class API::BookVersionCategoriesController < API::BaseController
        
      
        
          
          
          
        
      
        
          1
          
            def index
        
      
        
          
          
              if params[:warehouse_book_version_id].present?
        
      
        
          
          
                relation = BookVersionCategory.where(warehouse_book_version_id: params[:warehouse_book_version_id].split(','))
        
      
        
          
          
          
        
      
        
          
          
                render json: api_response(:ok, book_version_categories: BookVersionCategorySerializer.new(relation.to_a))
        
      
        
          
          
              elsif params[:category_name].present?
        
      
        
          
          
                relation = BookVersionCategory.where(category_name: params[:category_name].split(','))
        
      
        
          
          
          
        
      
        
          
          
                render json: api_response(:ok, book_version_categories: BookVersionCategorySerializer.new(relation.to_a))
        
      
        
          
          
              else
        
      
        
          
          
                render json: api_response(:error)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def show
        
      
        
          
          
              book_version_category = BookVersionCategory.find(params[:id])
        
      
        
          
          
              response = book_version_category.present? ? api_response(:ok, book_version_category: BookVersionCategorySerializer.new(book_version_category)) : api_response(:error)
        
      
        
          
          
          
        
      
        
          
          
              render json: response
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class API::BookVersionsController < API::BaseController
        
      
        
          1
          
            def index
        
      
        
          
          
              if Utilities::TLDS.include?(params[:tld]) && params[:keys].present?
        
      
        
          
          
                relation = WarehouseBookVersion.where(tld: params[:tld]).where{(asin.in my{params[:keys].split(',')}) | (isbn13.in my{params[:keys].split(',')})}
        
      
        
          
          
          
        
      
        
          
          
                render json: api_response(:ok, book_versions: BookVersionSerializer.new(relation.to_a))
        
      
        
          
          
              else
        
      
        
          
          
                render json: api_response(:error)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def show
        
      
        
          
          
              book_version = WarehouseBookVersion.find(params[:id])
        
      
        
          
          
              response = book_version.present? ? api_response(:ok, book_version: BookVersionSerializer.new(book_version)) : api_response(:error)
        
      
        
          
          
          
        
      
        
          
          
              render json: response
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class API::CategoriesController < API::BaseController
        
      
        
          1
          
            def index
        
      
        
          
          
              if Utilities::TLDS.include?(params[:tld]) && params[:name].present?
        
      
        
          
          
                relation = WarehouseCategory.where(tld: params[:tld], name: params[:name].split(','))
        
      
        
          
          
          
        
      
        
          
          
                render json: api_response(:ok, categories: CategorySerializer.new(relation.to_a))
        
      
        
          
          
              else
        
      
        
          
          
                render json: api_response(:error)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def show
        
      
        
          
          
              category = WarehouseCategory.find(params[:id])
        
      
        
          
          
              response = category.present? ? api_response(:ok, category: CategorySerializer.new(category)) : api_response(:error)
        
      
        
          
          
          
        
      
        
          
          
              render json: response
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class API::CategoryStatsController < API::BaseController
        
      
        
          1
          
            SELECTABLE_FIELDS = %i[warehouse_category_id warehouse_category_external_id]
        
      
        
          
          
          
        
      
        
          1
          
            def index
        
      
        
          
          
              if (params[:date].present? || params[:start_date].present? && params[:end_date].present?) && Utilities::TLDS.include?(params[:tld]) && SELECTABLE_FIELDS.any? {|field| params[field].present?}
        
      
        
          
          
                dates = params[:date].present? ? [params[:date].to_date] : (params[:start_date].to_date..params[:end_date].to_date).to_a
        
      
        
          
          
                relation = CategoryStat.select('category_stats.*, warehouse_dates.date as date, warehouse_regions.tld').
        
      
        
          
          
                                        includes(:warehouse_region, :warehouse_date, :warehouse_category).references(:all)
        
      
        
          
          
                                        where{warehouse_dates.date.in dates}
        
      
        
          
          
                                        where{warehouse_regions.tld == my{params[:tld]}}
        
      
        
          
          
                relation = relation.where(warehouse_category_id: params[:warehouse_category_id].split(',')) if params[:warehouse_category_id].present?
        
      
        
          
          
                relation = relation.where{warehouse_categories.category_id.in my{params[:warehouse_category_external_id].split(',')}} if params[:warehouse_category_external_id].present?
        
      
        
          
          
          
        
      
        
          
          
                render json: api_response(:ok, category_stat: CategoryStatSerializer.new(relation.to_a))
        
      
        
          
          
              else
        
      
        
          
          
                render json: api_response(:error)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def show
        
      
        
          
          
              category_stat = CategoryStat.find(params[:id])
        
      
        
          
          
              response = category_stat.present? ? api_response(:ok, category_stat: CategoryStatSerializer.new(category_stat)) : api_response(:error)
        
      
        
          
          
          
        
      
        
          
          
              render json: response
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class API::ProductStatsController < API::BaseController
        
      
        
          1
          
            def index
        
      
        
          
          
              if (params[:date].present? || params[:start_date].present? && params[:end_date].present?) && Utilities::TLDS.include?(params[:tld]) && (params[:keys].present? || params[:warehouse_book_version_ids])
        
      
        
          
          
                warehouse_date_ids = params[:date].present? ? WarehouseDate.find_by(date: params[:date].to_date).id : WarehouseDate.where(date: params[:start_date].to_date..params[:end_date].to_date).value_of(:id)
        
      
        
          
          
                relation = base_relation.where(warehouse_date_id: warehouse_date_ids).where{warehouse_book_versions.tld == my{params[:tld]}}.references(:all)
        
      
        
          
          
                relation = if params[:warehouse_book_version_ids].present?
        
      
        
          
          
                             relation.where(warehouse_book_version_id: params[:warehouse_book_version_ids].split(','))
        
      
        
          
          
                           else
        
      
        
          
          
                             keys_array = params[:keys].split(',')
        
      
        
          
          
                             relation.where{(warehouse_book_versions.asin.in my{keys_array}) | (warehouse_book_versions.isbn13.in my{keys_array})}.references(:all)
        
      
        
          
          
                           end
        
      
        
          
          
          
        
      
        
          
          
                render json: api_response(:ok, product_stats: ProductStatSerializer.new(relation.to_a))
        
      
        
          
          
              else
        
      
        
          
          
                render json: api_response(:error)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def show
        
      
        
          
          
              product_stat = base_relation.find_by(id: params[:id])
        
      
        
          
          
              response = product_stat.present? ? api_response(:ok, product_stat: ProductStatSerializer.new(product_stat)) : api_response(:error)
        
      
        
          
          
          
        
      
        
          
          
              render json: response
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def base_relation
        
      
        
          
          
              WarehouseStat.includes(:warehouse_book_version, :warehouse_date)
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class EnterpriseReportsMailer < ActionMailer::Base
        
      
        
          1
          
            default from: 'data@vook.com'
        
      
        
          
          
          
        
      
        
          1
          
            helper :application
        
      
        
          
          
          
        
      
        
          1
          
            def basic_report(report_hash, report_config)
        
      
        
          7
          
              to = Array.wrap(report_config[:recipients][Rails.env]) + Array.wrap(AmazeBot.config[:reports][:recipients_all_reports][Rails.env])
        
      
        
          
          
          
        
      
        
          7
          
              header = SmtpApiHeader.new
        
      
        
          7
          
              header.add_to to
        
      
        
          
          
          
        
      
        
          7
          
              @report_hashes = Array.wrap report_hash
        
      
        
          7
          
              @report_config = report_config
        
      
        
          
          
          
        
      
        
          7
          
              subject = "#{report_config[:email][:subject]} (#{l(Time.current.to_date, format: :medium).squish}#{" -- #{Rails.env} environment" unless Rails.env.production?})"
        
      
        
          
          
          
        
      
        
          7
          
              mail subject: subject, to: to, 'X-SMTPAPI' => header.as_json
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def booklr_status_report(stat_hash)
        
      
        
          3
          
              to = Array.wrap(AmazeBot.config[:reports][:recipients_all_reports][Rails.env])
        
      
        
          
          
          
        
      
        
          3
          
              header = SmtpApiHeader.new
        
      
        
          3
          
              header.add_to to
        
      
        
          
          
          
        
      
        
          3
          
              @stat_hash = stat_hash
        
      
        
          
          
          
        
      
        
          3
          
              subject = "Booklr Daily Status Report (#{l(Time.current.to_date, format: :medium).squish}#{" -- #{Rails.env} environment" unless Rails.env.production?})"
        
      
        
          
          
          
        
      
        
          3
          
              mail subject: subject, to: to, 'X-SMTPAPI' => header.as_json
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class InternalReportsMailer < ActionMailer::Base
        
      
        
          1
          
            default from: 'data@vook.com'
        
      
        
          
          
          
        
      
        
          1
          
            helper :application
        
      
        
          
          
          
        
      
        
          1
          
            def basic_report(report_id, file_details, report_config)
        
      
        
          
          
              to = Array.wrap(report_config[:recipients][Rails.env]) + Array.wrap(AmazeBot.config[:reports][:recipients_all_reports][Rails.env])
        
      
        
          
          
          
        
      
        
          
          
              header = SmtpApiHeader.new
        
      
        
          
          
              header.add_to to
        
      
        
          
          
          
        
      
        
          
          
              @report_id = report_id
        
      
        
          
          
              @file_details = file_details
        
      
        
          
          
              @report_config = report_config
        
      
        
          
          
          
        
      
        
          
          
              subject = "#{report_config[:email][:subject]} (#{l(Time.current.to_date, format: :medium).squish}#{" -- #{Rails.env} environment" unless Rails.env.production?})"
        
      
        
          
          
          
        
      
        
          
          
              mail subject: subject, to: to, 'X-SMTPAPI' => header.as_json
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class NotificationMailer < ActionMailer::Base
        
      
        
          1
          
            default from: 'data@vook.com', to: 'data@vook.com'
        
      
        
          1
          
            @header = SmtpApiHeader.new default_params[:to]
        
      
        
          
          
          
        
      
        
          1
          
            def scraper_count_error(message)
        
      
        
          8
          
              @message = message
        
      
        
          8
          
              mail subject: "Wrong number of stats queued up", 'X-SMTPAPI' => @header.as_json
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def report_row_count_error(report_identifier, message)
        
      
        
          2
          
              @message = message
        
      
        
          2
          
              mail subject: "[#{Utilities.env}] #{report_identifier} - Report Blocked", 'X-SMTPAPI' => @header.as_json
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonAPIResponse < ActiveRecord::Base
        
      
        
          
          
            # Attributes
        
      
        
          
          
          
        
      
        
          1
          
            attr_accessible :warehouse_book_version, :warehouse_book_version_id
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          
          
          
        
      
        
          1
          
            belongs_to :warehouse_book_version, inverse_of: :amazon_api_response
        
      
        
          1
          
            has_many :amazon_api_response_items, inverse_of: :amazon_api_response, dependent: :destroy
        
      
        
          7
          
            has_many :amazon_api_lookup_response_items, lambda {where(query_type: 'lookup').order(:response_rank)}, class_name: 'AmazonAPIResponseItem'
        
      
        
          8
          
            has_many :amazon_api_search_response_items, lambda {where(query_type: 'search').order(:response_rank)}, class_name: 'AmazonAPIResponseItem'
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          
          
          
        
      
        
          1
          
            validates_presence_of :warehouse_book_version_id
        
      
        
          
          
          
        
      
        
          
          
            # Miscellaneous
        
      
        
          
          
          
        
      
        
          1
          
            def matching_response_item
        
      
        
          7
          
              if amazon_api_search_response_items.count == 1
        
      
        
          1
          
                return_val = amazon_api_search_response_items.first
        
      
        
          1
          
                Rails.logger.tagged('inexplicable') {Rails.logger.info "DUMB (search) #{amazon_api_search_response_items.count} / #{return_val} / #{amazon_api_response_items.count}"} if return_val == nil
        
      
        
          1
          
                return_val
        
      
        
          6
          
              elsif amazon_api_lookup_response_items.count == 1
        
      
        
          1
          
                return_val = amazon_api_lookup_response_items.first
        
      
        
          1
          
                Rails.logger.tagged('inexplicable') {Rails.logger.info "DUMB (lookup) #{amazon_api_lookup_response_items.count} / #{return_val} / #{amazon_api_response_items.count}"} if return_val == nil
        
      
        
          1
          
                return_val
        
      
        
          5
          
              elsif amazon_api_response_items.count > 1
        
      
        
          4
          
                TrackedBookVersion.where(warehouse_book_version_id: warehouse_book_version.id).each do |tracked_book_version|
        
      
        
          1
          
                  if tracked_book_version.metadata.present?
        
      
        
          1
          
                    metadata_asin = tracked_book_version.metadata[0] if tracked_book_version.metadata[0].present? && tracked_book_version.metadata[0].length == 10
        
      
        
          1
          
                    if metadata_asin.present?
        
      
        
          1
          
                      amazon_api_response_items.each do |response_item|
        
      
        
          2
          
                        if response_item.asin == metadata_asin
        
      
        
          1
          
                          return response_item
        
      
        
          
          
                        end
        
      
        
          
          
                      end
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                # For all top 100 ingestions (old style) with an isbn13 and am ambiguous result, select the one that matches
        
      
        
          
          
                # the asin to its isbn13 converted to isbn10
        
      
        
          3
          
                if warehouse_book_version.source.present? && warehouse_book_version.source.include?('amazon-top100-') && warehouse_book_version.isbn13.present?
        
      
        
          1
          
                  amazon_api_response_items.each do |response_item|
        
      
        
          2
          
                    if response_item.asin == ISBN_Tools.isbn13_to_isbn10(warehouse_book_version.isbn13)
        
      
        
          1
          
                      return response_item
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          2
          
                :ambiguous_results
        
      
        
          
          
              else
        
      
        
          1
          
                :no_results
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonAPIResponseItem < ActiveRecord::Base
        
      
        
          
          
            # Attributes
        
      
        
          
          
          
        
      
        
          1
          
            QUERY_TYPES = [:lookup, :search].freeze
        
      
        
          
          
          
        
      
        
          1
          
            attr_accessible :amazon_api_response, :amazon_api_response_id, :asin, :author, :binding, :brand, :creator, :ean, :ean_list_element, :eisbn,
        
      
        
          
          
                            :isbn, :item_dimensions_height, :item_dimensions_height_unit, :item_dimensions_length, :item_dimensions_length_unit,
        
      
        
          
          
                            :item_dimensions_weight, :item_dimensions_weight_unit, :item_dimensions_width, :item_dimensions_width_unit,
        
      
        
          
          
                            :label, :large_image_url, :list_price_amount, :list_price_currency_code, :manufacturer, :medium_image_url,
        
      
        
          
          
                            :number_of_pages, :package_dimensions_height, :package_dimensions_height_unit, :package_dimensions_length,
        
      
        
          
          
                            :package_dimensions_length_unit, :package_dimensions_weight, :package_dimensions_weight_unit, :package_dimensions_width,
        
      
        
          
          
                            :package_dimensions_width_unit, :publication_date, :publisher, :sales_rank, :small_image_url, :studio,
        
      
        
          
          
                            :title, :response_rank, :query_type
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          
          
          
        
      
        
          1
          
            belongs_to :amazon_api_response, inverse_of: :amazon_api_response_items, touch: true
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          
          
          
        
      
        
          1
          
            validates_presence_of :amazon_api_response_id, :response_rank, :query_type
        
      
        
          1
          
            validates_uniqueness_of :response_rank, scope: [:amazon_api_response_id, :query_type]
        
      
        
          1
          
            validates_inclusion_of :query_type, in: QUERY_TYPES + QUERY_TYPES.collect(&:to_s)
        
      
        
          
          
          
        
      
        
          
          
            # Miscellaneous
        
      
        
          
          
          
        
      
        
          1
          
            def isbn13
        
      
        
          4
          
              return eisbn if eisbn.present?
        
      
        
          3
          
              return ean if ean.present?
        
      
        
          2
          
              ean_list_element if ean_list_element.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def author_name
        
      
        
          3
          
              author.present? ? author : creator
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def physical_details
        
      
        
          1
          
              if package_dimensions_length.present? && package_dimensions_width.present? && package_dimensions_height.present? && package_dimensions_weight.present?
        
      
        
          1
          
                "#{package_dimensions_length / 100.0} x #{package_dimensions_width / 100.0} x #{package_dimensions_height / 100.0} inches. #{package_dimensions_weight / 100.0} pounds."
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def title
        
      
        
          27
          
              read_attribute(:title).try(:gsub, /\n/, ' ')
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_binding
        
      
        
          1
          
              read_attribute(:binding)
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonAuthorPage < AmazonPage
        
      
        
          1
          
            def self.by_asin_and_tld(author_asin, tld)
        
      
        
          1
          
              new Urls.amazon_author_page(author_asin, tld)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_also_bought_items_by
        
      
        
          2
          
              @page.css('#entitySimsTable td a').collect(&:text).presence if @page.css('#entitySimsTable td a').present?
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonBestSellersPage < AmazonPage
        
      
        
          1
          
            ACCEPTABLE_BASE_CATEGORIES = ['Books', 'Kindle Store > Kindle eBooks', 'Kindle Store > Kindle Singles', 'Kindle Store > Books'].freeze
        
      
        
          
          
          
        
      
        
          1
          
            def self.by_category_id_and_tld_and_base_category_and_page_number(category_id, tld, base_category, page_number)
        
      
        
          11
          
              new Urls.amazon_book_category_page(category_id, tld, base_category, page_number)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.is_acceptable_amazon_category_name?(category_name)
        
      
        
          14
          
              category_name.present? && ACCEPTABLE_BASE_CATEGORIES.any? {|acceptable_category| acceptable_category == category_name || category_name.starts_with?("#{acceptable_category} > ")}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def best_sellers_stats
        
      
        
          2
          
              return [] if scrape_best_sellers.blank?
        
      
        
          
          
          
        
      
        
          2
          
              scrape_best_sellers.collect do |element|
        
      
        
          
          
                {rank: get_rank(element),
        
      
        
          
          
                 days_in_top_100: get_days_in_top_100(element),
        
      
        
          
          
                 trend: get_trend(element),
        
      
        
          
          
                 title: get_title(element),
        
      
        
          
          
                 asin: get_asin(element),
        
      
        
          
          
                 author: get_author_name(element),
        
      
        
          
          
                 star_rating: get_star_rating(element),
        
      
        
          
          
                 rating_count: get_rating_count(element),
        
      
        
          
          
                 list_price: get_list_price(element),
        
      
        
          40
          
                 price: get_price(element)}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_category_name
        
      
        
          21
          
              leaf = @page.css('#zg_browseRoot .zg_selected').first
        
      
        
          21
          
              if leaf.present?
        
      
        
          21
          
                categories = @page.css('#zg_browseRoot ul > li:first-child *:first-child').to_a
        
      
        
          21
          
                categories = categories[0, categories.size - 1] || []
        
      
        
          21
          
                categories << leaf
        
      
        
          63
          
                categories.uniq.collect {|category| category.text.squish}.join(' > ')
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_subcategories
        
      
        
          1
          
              if @page.css('#zg_browseRoot .zg_selected').present? && @page.css('#zg_browseRoot .zg_selected').first.parent.parent.css('ul').present?
        
      
        
          1
          
                @page.css('#zg_browseRoot .zg_selected').first.parent.parent.css('li a').each_with_object([]) do |element, arr|
        
      
        
          20
          
                  subcategory_name = "#{scrape_category_name} > #{element.text.squish}"
        
      
        
          20
          
                  subcategory_id = element.attributes['href'].present? ? ScraperUtilities.get_category_id_from_url(element.attributes['href'].text.squish) : nil
        
      
        
          20
          
                  arr << {category_name: subcategory_name, category_id: subcategory_id}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_number_of_pages
        
      
        
          1
          
              @page.css('.zg_pagination .zg_page').count
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_best_sellers_1
        
      
        
          4
          
              @page.css('.zg_itemImmersion').presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_best_sellers_2
        
      
        
          
          
              @page.css('.zg_itemRow > :first-child').presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_no_best_sellers?
        
      
        
          2
          
              @page.css('.zg_infoMessage').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_rank(element)
        
      
        
          40
          
              element.css('.zg_rankNumber').first.text.squish.gsub('.', '') if element.css('.zg_rankNumber').first.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_days_in_top_100(element)
        
      
        
          40
          
              days_in_top_100_element = element.css('.zg_rankMeta').first || element.css('.zg_daysInList').first
        
      
        
          40
          
              days_in_top_100_element.text.squish.gsub(/ days? in the top 100/, '') if days_in_top_100_element.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_trend(element)
        
      
        
          40
          
              if element.css('.zg_arrowUp').present?
        
      
        
          14
          
                'Up'
        
      
        
          26
          
              elsif element.css('.zg_arrowDown').present?
        
      
        
          10
          
                'Down'
        
      
        
          
          
              else
        
      
        
          16
          
                'Steady'
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_title(element)
        
      
        
          40
          
              title_element1 = element.css('.zg_title a').first
        
      
        
          40
          
              title_element2 = element.css('.zg_itemImage_compact img').first
        
      
        
          
          
          
        
      
        
          40
          
              (title_element1.try(:text).try(:squish) if title_element1.present?) ||
        
      
        
          40
          
                  (title_element2.attributes['title'].try(:text).try(:squish) if title_element2.present?) ||
        
      
        
          
          
                  (title_element2.attributes['alt'].try(:text).try(:squish) if title_element2.present?)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_asin(element)
        
      
        
          40
          
              title_element1 = element.css('.zg_title a').first
        
      
        
          40
          
              asin_element1 = element.css('.zg_itemImage_compact a').first
        
      
        
          40
          
              asin_element2 = element.css('.asinReviewsSummary').first
        
      
        
          40
          
              asin_element3 = element.css('.crAvgStars > a').first
        
      
        
          40
          
              asin_element4 = asin_element2.present? ? asin_element2.css('a').first : nil
        
      
        
          
          
          
        
      
        
          40
          
              (ScraperUtilities.extract_asin_from_url(title_element1.attributes['href'].try(:text)) if title_element1.present?) ||
        
      
        
          40
          
                  (ScraperUtilities.extract_asin_from_url(asin_element1.attributes['href'].try(:text)) if asin_element1.present?) ||
        
      
        
          
          
                  (asin_element2.attributes['name'].try(:text) if asin_element2.present?) ||
        
      
        
          
          
                  (ScraperUtilities.extract_asin_from_url(asin_element3.attributes['href'].try(:text)) if asin_element3.present?) ||
        
      
        
          
          
                  (ScraperUtilities.extract_asin_from_url(asin_element4.attributes['href'].try(:text)) if asin_element4.present?)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_author_name(element)
        
      
        
          40
          
              element.css('.zg_byline').first.text.gsub(/^by /, '').squish if element.css('.zg_byline').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_star_rating(element)
        
      
        
          40
          
              element.css('.swSprite:not(.s_primeBadge)').first.attributes['title'].text.squish.gsub(' out of 5 stars', '') if element.css('.swSprite:not(.s_primeBadge)').present? && element.css('.swSprite:not(.s_primeBadge)').first.attributes['title'].text.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_rating_count(element)
        
      
        
          40
          
              element.css('.crAvgStars > a').first.text.squish.gsub(',', '') if element.css('.crAvgStars > a').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_list_price(element)
        
      
        
          40
          
              element.css('.listprice').first.text.squish.gsub(/\$|\./, '') if element.css('.listprice').present? && element.css('.listprice').first.text.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_price(element)
        
      
        
          40
          
              element.css('.price').first.text.squish.gsub(/\$|\./, '') if element.css('.price').present? && element.css('.price').first.text.present?
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonCategoryCollection < Mongo::Collection
        
      
        
          1
          
            def initialize(opts = {})
        
      
        
          24
          
              super 'amazon_categories', $mongodb, opts
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def prepare_for_scraping
        
      
        
          
          
              # Clear the collection
        
      
        
          2
          
              drop
        
      
        
          
          
          
        
      
        
          
          
              # Add these to the canonical categories since we don't scrape them, but do associate them with warehouse_stats and warehouse_list_stats
        
      
        
          2
          
              add_category_details nil, 'Kindle Store', '.com', :canonical
        
      
        
          2
          
              add_category_details nil, 'Kindle Store', '.co.uk', :canonical
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def add_category_details(category_id, category_name, tld, status)
        
      
        
          20
          
              insert category_id: category_id, category_name: category_name, tld: tld, status: status
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonKindleDailyDealsPage < AmazonPage
        
      
        
          1
          
            def initialize(user_agent = 'Windows Mozilla')
        
      
        
          7
          
              super 'http://www.amazon.com/gp/feature.html?ie=UTF8&docId=1000677541', user_agent
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # If there are multiple Daily Kindle Deals, combine them with the on page deals to get all on page deals
        
      
        
          
          
            # Otherwise, return just the on page deals which includes the single Daily Kindle Deal
        
      
        
          1
          
            def deals
        
      
        
          2
          
              multiple_kindle_daily_deal.present? ? on_page_daily_deals + multiple_kindle_daily_deal : on_page_daily_deals
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Returns the Daily Kindle Deals if more than one exists
        
      
        
          1
          
            def multiple_kindle_daily_deal
        
      
        
          3
          
              page_banner = @page.css('.amabot_center > .pageBanner').first
        
      
        
          3
          
              first_deals_element = page_banner.next_element
        
      
        
          3
          
              if first_deals_element.attributes.present?
        
      
        
          10
          
                scrape_details_from_widget(first_deals_element).each_with_index.collect {|details, index| details.merge daily_deal_type: 'The Kindle Daily Deal', rank: index + 1}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Returns all daily deals that have non-search page urls, means there is only one book for that daily deal category
        
      
        
          
          
            # Will return the Daily Kindle Deal if it only has one book, will not if it has more than one book
        
      
        
          1
          
            def on_page_daily_deals
        
      
        
          11
          
              all_daily_deals.reject {|details| URI.parse(URI.encode(details[:url])).path.start_with? '/s/'}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Returns daily deals that have a search page url, means the daily deal category has multiple books
        
      
        
          1
          
            def search_page_daily_deals
        
      
        
          
          
              all_daily_deals.select {|details| URI.parse(URI.encode(details[:url])).path.start_with? '/s/'}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Does not include multiple book Daily Kindle Deals, despite name
        
      
        
          1
          
            def all_daily_deals
        
      
        
          2
          
              deals = []
        
      
        
          
          
          
        
      
        
          2
          
              @page.css('.amabot_center > div > table > tr:nth-child(odd) > td:nth-child(odd) > b').each_with_index do |element, index|
        
      
        
          9
          
                deals[index] ||= {}
        
      
        
          9
          
                deals[index][:daily_deal_type] = element.text.squish
        
      
        
          
          
              end
        
      
        
          2
          
              @page.css('.amabot_center > div > table > tr:nth-child(even) > td:not(:empty)').each_with_index do |element, index|
        
      
        
          9
          
                deals[index] ||= {}
        
      
        
          9
          
                if element.css('p a').present? && element.css('p a').first.attributes['href'].present?
        
      
        
          9
          
                  deals[index][:asin] = ScraperUtilities.extract_asin_from_url(element.css('p a').first.attributes['href'].value)
        
      
        
          9
          
                  deals[index][:url] = force_absolute_url(element.css('p a').first.attributes['href'].value)
        
      
        
          
          
                end
        
      
        
          9
          
                deals[index][:author_name] = element.css('p').first.text.squish.gsub(/^by /, '') if element.css('p').present?
        
      
        
          9
          
                if element.css('b i').present?
        
      
        
          7
          
                  deals[index][:title] = element.css('b i').first.text.squish
        
      
        
          7
          
                  deals[index][:description] = element.css('p')[2].text.squish if element.css('p')[2].present?
        
      
        
          7
          
                  deals[index][:price] = ScraperUtilities.cleanse_price(element.css('.price').text) if element.css('.price').present?
        
      
        
          
          
                else
        
      
        
          2
          
                  deals[index][:description] = element.children[1].text.squish if element.children[1].present?
        
      
        
          2
          
                  deals[index][:price] = ScraperUtilities.cleanse_price(element.css('p')[1].text) if element.css('p')[1].present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          2
          
              deals
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonKindleMonthlyDealsPage < AmazonPage
        
      
        
          1
          
            def initialize(user_agent = 'Windows Mozilla')
        
      
        
          6
          
              super 'http://www.amazon.com/b/ref=amb_link_380698542_1?ie=UTF8&node=3441883011&pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-1&pf_rd_r=0B9969RWA42TMTQG751R&pf_rd_t=1401&pf_rd_p=1590374862&pf_rd_i=1000706171', user_agent
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def deals
        
      
        
          
          
              @page.css('.amabot_center .widget').collect do |widget|
        
      
        
          6
          
                header_text = widget.css('h2').text.squish
        
      
        
          6
          
                deal_details = scrape_details_from_widget(widget)
        
      
        
          53
          
                deal_details.each_with_index.collect {|details, index| details.merge daily_deal_type: "Carousel - #{header_text}", rank: index + 1} if deal_details.present?
        
      
        
          1
          
              end.flatten.compact
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def category_urls
        
      
        
          
          
              @page.css('#leftNav .left_nav ul li a').collect do |anchor|
        
      
        
          
          
                force_absolute_url anchor['href']
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonKindleSelectPage < AmazonPage
        
      
        
          1
          
            def initialize(user_agent = 'Windows Mozilla')
        
      
        
          4
          
              super 'http://www.amazon.com/gp/feature.html?ie=UTF8&docId=1001298091', user_agent
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def all_select_titles
        
      
        
          1
          
              titles = []
        
      
        
          1
          
              @page.css('.amabot_center > div > table:nth-of-type(1) > tr:nth-child(odd) > td:nth-child(odd) > b').each_with_index do |element, index|
        
      
        
          6
          
                titles[index] ||= {}
        
      
        
          6
          
                titles[index][:rank] = element.children[0].text.gsub(/\D/, '').to_i if element.children[0].present?
        
      
        
          6
          
                titles[index][:title] = element.children[1].text.squish if element.children[1].present?
        
      
        
          6
          
                titles[index][:author_name] = element.children[2].text.squish.gsub(/^by /, '') if element.children[2].present?
        
      
        
          
          
              end
        
      
        
          1
          
              @page.css('.amabot_center > div > table:nth-of-type(1) > tr:nth-child(even) > td:nth-child(odd)').each_with_index do |element, index|
        
      
        
          6
          
                titles[index] ||= {}
        
      
        
          6
          
                if element.css('a').present? && element.css('a').first.attributes['href'].present?
        
      
        
          6
          
                  titles[index][:asin] = ScraperUtilities.extract_asin_from_url(element.css('a').first.attributes['href'].value)
        
      
        
          6
          
                  titles[index][:url] = force_absolute_url(element.css('a').first.attributes['href'].value)
        
      
        
          
          
                end
        
      
        
          6
          
                titles[index][:price] = ScraperUtilities.cleanse_price(element.css('.price').text) if element.css('.price').present?
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              rank = titles.count
        
      
        
          
          
          
        
      
        
          1
          
              titles += scrape_details_from_widget(@page.css('.amabot_center .s9Widget').first).each_with_index.collect do |details, index|
        
      
        
          19
          
                details.merge rank: rank + index + 1
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          26
          
              titles.each {|title| title.merge! daily_deal_type: 'Kindle Select 25'}
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonPage < Page
        
      
        
          1
          
            def initialize(url, user_agent = 'Windows Mozilla')
        
      
        
          184
          
              super url, user_agent
        
      
        
          184
          
              @asin = ScraperUtilities.extract_asin_from_url url
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def captcha?
        
      
        
          26
          
              @page.css('input#captchacharacters').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_details_from_widget(widget)
        
      
        
          9
          
              deals = []
        
      
        
          
          
          
        
      
        
          9
          
              widget.css('div.s9hl').each do |element|
        
      
        
          74
          
                title_data = {}
        
      
        
          74
          
                if element.css('.title').present?
        
      
        
          74
          
                  title_data[:asin] = ScraperUtilities.extract_asin_from_url(element.css('.title').first.attributes['href'].value)
        
      
        
          74
          
                  title_data[:url] = force_absolute_url(element.css('.title').first.attributes['href'].value)
        
      
        
          74
          
                  title_data[:title] = element.css('.title').first.attributes['title'].value.squish
        
      
        
          
          
                end
        
      
        
          74
          
                title_data[:author_name] = element.css('div.t11').first.text.gsub('›', '').squish if element.css('div.t11').present?
        
      
        
          74
          
                title_data[:price] = ScraperUtilities.cleanse_price(element.css('.s9Price').text) if element.css('.s9Price').present?
        
      
        
          74
          
                title_data[:title] = element.css('.s9TitleText').first.text.squish if title_data[:title].blank? && element.css('.s9TitleText').present?
        
      
        
          74
          
                deals << title_data
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          9
          
              deals
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonProductPage < AmazonPage
        
      
        
          1
          
            EMPTY_RATING_HISTOGRAM = {five_star_count: '0', four_star_count: '0', three_star_count: '0', two_star_count: '0', one_star_count: '0'}.with_indifferent_access.freeze
        
      
        
          1
          
            def self.by_asin_and_tld(asin, tld)
        
      
        
          100
          
              new Urls.amazon_book_page(asin, tld)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def book_image_exists?
        
      
        
          
          
              # '#prodImageCell a img' is the old scrape, doesnt seem to exist anymore but here for completeness
        
      
        
          
          
              # '#main-image' is the newer scrape and should cover all other cases
        
      
        
          
          
              # '#imgBlkFront' indicates the new page format and if its the correct style it means the no image image was found
        
      
        
          3
          
              @page.css('#prodImageCell a img').present? || @page.css('#main-image').present? ||
        
      
        
          1
          
                  (@page.css('#imgBlkFront').present? && @page.css('#imgBlkFront').first['style'] != 'max-width:60px; max-height:40px;')
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def buy_button_exists?
        
      
        
          
          
              # 'input#buyButton' is for 1-click ebook and pre order buy buttons, 'input#buyButton' for non 1 click e book buy buttons
        
      
        
          
          
              # 'span#addToCartSpan' for all physical buy and pre order buttons
        
      
        
          
          
              # "//input[@name='submit.add-to-cart']" indicates the new page type buy button (both normal and pre-order)
        
      
        
          2
          
              @page.css('input#buyButton').present? || @page.css('button#buyButton').present? ||
        
      
        
          
          
                  @page.css('span#addToCartSpan').present? || @page.xpath("//input[@name='submit.add-to-cart']").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def physical_details
        
      
        
          3
          
              scrape_weight.blank? ? scrape_dimensions : "#{scrape_dimensions}. #{scrape_weight}."
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_star_rating_distribution_1
        
      
        
          3
          
              EMPTY_RATING_HISTOGRAM if @page.css('#emptyHistogram').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_star_rating_distribution_2
        
      
        
          3
          
              distribution = %w[five four three two one].each_with_object(HashWithIndifferentAccess.new) do |num, hash|
        
      
        
          15
          
                count_text = @page.css("div.histoRow#{num} div.histoCount").text
        
      
        
          15
          
                hash["#{num}_star_count"] = count_text.present? ? count_text.gsub(',', '') : '0'
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          3
          
              distribution unless distribution == EMPTY_RATING_HISTOGRAM
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_star_rating_distribution_3
        
      
        
          3
          
              @page.css('#histogramTable .a-histogram-row').each_with_object(HashWithIndifferentAccess.new) do |row, hash|
        
      
        
          15
          
                number = row.css('td:first').text.scan(/\d+/).join('')
        
      
        
          15
          
                number_as_word = case number
        
      
        
          
          
                                   when '1'
        
      
        
          3
          
                                    'one'
        
      
        
          
          
                                   when '2'
        
      
        
          3
          
                                    'two'
        
      
        
          
          
                                   when '3'
        
      
        
          3
          
                                    'three'
        
      
        
          
          
                                   when '4'
        
      
        
          3
          
                                    'four'
        
      
        
          
          
                                   when '5'
        
      
        
          3
          
                                    'five'
        
      
        
          
          
                                 end
        
      
        
          15
          
                hash["#{number_as_word}_star_count"] = row.css('td:last').text.scan(/\d+/).join('')
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_likes
        
      
        
          2
          
              like_count_element = @page.css('span.amazonLikeCount').first
        
      
        
          2
          
              like_count_element.text.gsub(',', '').strip if like_count_element.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sales_rank_1
        
      
        
          7
          
              sales_rank_element = @page.xpath(".//li[@id='SalesRank']/b").first
        
      
        
          7
          
              sales_rank_element.next_sibling.text.scan(/\d/).join if sales_rank_element.present? && sales_rank_element.next_sibling.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sales_rank_2
        
      
        
          
          
              @page.css('#amazon-sales-rank-detail .a-span9 span span:first').text.split(' in ').first.scan(/\d/).join if @page.css('#amazon-sales-rank-detail .a-span9 span span:first').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def sales_rank_category_element
        
      
        
          11
          
              @page.xpath(".//li[@id='SalesRank']/b").first.next_sibling if @page.xpath(".//li[@id='SalesRank']/b").first.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sales_rank_category_1
        
      
        
          2
          
              sales_rank_category_element.text.split('in', 2)[1].gsub('(','').squish if sales_rank_category_element.present? && sales_rank_category_element.text.include?('Kindle')
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sales_rank_category_2
        
      
        
          2
          
              sales_rank_category_element.text.split('in')[1].split(' ').first.gsub(',', '') if sales_rank_category_element.present? && sales_rank_category_element.text.exclude?('Kindle')
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sales_rank_category_3
        
      
        
          
          
              @page.css('#amazon-sales-rank-detail .a-span9 span span:first').text.scan(/[a-zA-Z]| /).join('').squish.gsub(/^in /, '') if @page.css('#amazon-sales-rank-detail .a-span9 span span:first').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sub_categories_and_ranks_1
        
      
        
          6
          
              @page.css('#SalesRank .zg_hrsr_item').each_with_object([]) do |element, arr|
        
      
        
          13
          
                rank_element = element.css('.zg_hrsr_rank').first
        
      
        
          13
          
                category_element = element.css('.zg_hrsr_ladder').first
        
      
        
          13
          
                category_id_element = element.css('.zg_hrsr_ladder b a').first
        
      
        
          
          
          
        
      
        
          
          
                # gsub '#' out for US/UK and 'Nr. ' out for DE
        
      
        
          
          
                arr << {
        
      
        
          13
          
                         rank: (rank_element.text.gsub('#', '').gsub('Nr. ','').squish if rank_element.present?),
        
      
        
          13
          
                         category: (category_element.text.gsub(/\u00A0/, ' ').gsub(/^in /, '').squish if category_element.present?),
        
      
        
          13
          
                         category_id: (ScraperUtilities.get_category_id_from_url(category_id_element.attributes['href'].text.squish) if category_id_element.present?)
        
      
        
          13
          
                       }.with_indifferent_access
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sub_categories_and_ranks_2
        
      
        
          
          
              spans = @page.css('#amazon-sales-rank-detail .a-span9 span span')
        
      
        
          
          
              spans.shift
        
      
        
          
          
              spans.each_with_object([]) do |element, arr|
        
      
        
          
          
                arr << {
        
      
        
          
          
                         rank: (element.text.split(' in ').first.scan(/\d/).join),
        
      
        
          
          
                         category: (element.text.split(' in ').last),
        
      
        
          
          
                         category_id: (ScraperUtilities.get_category_id_from_url(element.css('a').first.attr('href')) if element.css('a').first.present?)
        
      
        
          
          
                       }.with_indifferent_access
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_book_format_1
        
      
        
          32
          
              @page.css('#btAsinTitle').text.scan(/\[(.+?)\]/).last.first if @page.css('#btAsinTitle').present? && @page.css('#btAsinTitle').text.scan(/\[(.+?)\]/).present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_book_format_2
        
      
        
          1
          
              @page.css('h1#title span').first.inner_text.strip if @page.css('h1#title span').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_book_format_3
        
      
        
          
          
              @page.css('#formats span.a-declarative .a-span4 > span').text.strip if @page.css('#formats span.a-declarative .a-span4 > span').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_also_boughts
        
      
        
          
          
              @page.css('#purchaseButtonWrapper .shoveler-content li').each_with_object([]) do |element, array|
        
      
        
          12
          
                title_link = element.css('a.sim-img-title').first
        
      
        
          12
          
                if title_link.present?
        
      
        
          12
          
                  title_element = element.css('a.sim-img-title span').first
        
      
        
          12
          
                  author_element = element.css('.byline, .shvl-byline').first
        
      
        
          12
          
                  star_count_element = element.css('.auiTestSprite').first
        
      
        
          12
          
                  rating_count_element = element.css('.crAvgStars > a').first
        
      
        
          12
          
                  price_element = element.css('.price').first
        
      
        
          12
          
                  array << {title: ScraperUtilities.cleanse_string((title_element.present? && title_element.attributes['title'].present? ? title_element.attributes['title'].text.strip : title_link.text.strip)),
        
      
        
          
          
                            isbn_or_asin: ScraperUtilities.extract_asin_from_url(title_link.attributes['href'].text),
        
      
        
          447
          
                            author: (author_element.text.chars.select{|i| i.valid_encoding?}.join.gsub('by ', '').gsub(/[^0-9a-z ]/i, '').strip if author_element.present?),
        
      
        
          12
          
                            star_count: (star_count_element.attributes['title'].text.strip if star_count_element.present? && star_count_element.attributes['title'].present?),
        
      
        
          12
          
                            rating_count: (rating_count_element.text.strip.gsub(',', '') if rating_count_element.present?),
        
      
        
          24
          
                            price: (ScraperUtilities.cleanse_price(price_element.text.strip) if price_element.present?)}.with_indifferent_access
        
      
        
          
          
                end
        
      
        
          2
          
              end.presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_bought_after_viewing
        
      
        
          2
          
              bought_after_viewing = []
        
      
        
          2
          
              bought_after_viewing_element = @page.xpath("//*[.='What Other Items Do Customers Buy After Viewing This Item?']").first
        
      
        
          2
          
              if bought_after_viewing_element.present?
        
      
        
          2
          
                bought_after_viewing = bought_after_viewing_element.parent.css('.asinDetails').each_with_object([]) do |element, array|
        
      
        
          8
          
                  title_element = element.css('.cpAsinTitle').first
        
      
        
          8
          
                  if title_element.present?
        
      
        
          8
          
                    isbn_or_asin_element = element.css('a').first
        
      
        
          8
          
                    author_element = element.css('.vtp-byline-text').first
        
      
        
          8
          
                    star_count_element = element.css('.swSprite').first
        
      
        
          8
          
                    rating_count_element = element.css('.crAvgStars > a').first
        
      
        
          8
          
                    price_element = element.css('.price').first
        
      
        
          
          
                    array << {title: ScraperUtilities.cleanse_string(title_element.text.strip),
        
      
        
          8
          
                              isbn_or_asin: (ScraperUtilities.extract_asin_from_url(isbn_or_asin_element.attributes['href'].text) if isbn_or_asin_element.present?),
        
      
        
          8
          
                              author: (author_element.text.strip.gsub('by ', '') if author_element.present?),
        
      
        
          8
          
                              star_count: (star_count_element.attributes['title'].text.strip if star_count_element.present?),
        
      
        
          8
          
                              rating_count: (rating_count_element.text.strip.gsub(',', '') if rating_count_element.present?),
        
      
        
          16
          
                              price: (ScraperUtilities.cleanse_price(price_element.text.strip) if price_element.present?)}.with_indifferent_access
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          2
          
              bought_after_viewing.presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_frequently_bought_together_1
        
      
        
          
          
              @page.css('#fbt_item_details li').each_with_object([]) do |element, array|
        
      
        
          6
          
                title_element = element.css('label > span, span > a').first
        
      
        
          6
          
                if title_element.present?
        
      
        
          6
          
                  author_element = element.css('.bxgy-byline-text').first
        
      
        
          6
          
                  price_element = element.css('.bxgy-item-price').first
        
      
        
          6
          
                  type_element = element.css('.bxgy-binding-byline').first
        
      
        
          6
          
                  type_text_element = element.css('.bxgy-byline-text').first
        
      
        
          
          
                  array << {title: ScraperUtilities.cleanse_string(title_element.text.squish),
        
      
        
          6
          
                            author: (author_element.text.squish.gsub('by ', '') if author_element.present?),
        
      
        
          6
          
                            price: (ScraperUtilities.cleanse_price(price_element.text.squish) if price_element.present?),
        
      
        
          12
          
                            type: (type_element.text.gsub(type_text_element.text, '').squish if type_element.present? && type_text_element.present?)}.with_indifferent_access
        
      
        
          
          
                end
        
      
        
          2
          
              end.presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_frequently_bought_together_2
        
      
        
          
          
              bought_together_form = @page.css('#AutoBuyXGetY form').first
        
      
        
          
          
              title_element = @page.css('.bxgy-text a').first
        
      
        
          
          
              if bought_together_form.present? && title_element.present?
        
      
        
          
          
                  author_element = bought_together_form.css('.bxgy-byline-text').first
        
      
        
          
          
                  price_element = bought_together_form.css('.bxgy-item-price').first
        
      
        
          
          
                  type_element = bought_together_form.css('.bxgy-binding-byline').first
        
      
        
          
          
                  type_text_element = bought_together_form.css('.bxgy-byline-text').first
        
      
        
          
          
                  [{title: ScraperUtilities.cleanse_string(title_element.text.squish),
        
      
        
          
          
                    author: (author_element.text.squish.gsub('by ', '') if author_element.present?),
        
      
        
          
          
                    price: (ScraperUtilities.cleanse_price(price_element.text.squish) if price_element.present?),
        
      
        
          
          
                    type: (type_element.text.gsub(type_text_element.text, '').squish if type_element.present? && type_text_element.present?)}.with_indifferent_access]
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_similar_items_by_category
        
      
        
          2
          
              similar_items_element = @page.xpath("//*[.='Look for Similar Items by Category']").first
        
      
        
          2
          
              if similar_items_element.present? && similar_items_element.parent.css('li').present?
        
      
        
          1
          
                similar_items_element.parent.css('li').collect(&:text).compact.presence
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_similar_items_by_category_id
        
      
        
          1
          
              similar_items_element = @page.xpath("//*[.='Look for Similar Items by Category']").first
        
      
        
          1
          
              if similar_items_element.present? && similar_items_element.parent.css('li a:last-of-type').present?
        
      
        
          
          
                similar_items_element.parent.css('li a:last-of-type').collect {|element| CGI::parse(URI::parse(element['href']).query)['node'] if element['href'].present?}.flatten.compact.presence
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_related_format_data_1
        
      
        
          7
          
              contains_currency_condition = ScraperUtilities::CURRENCY_CONDITION
        
      
        
          
          
              %w[paperback hardcover mass_market_paperback kindle].each_with_object({}) do |format, hash|
        
      
        
          28
          
                @page.xpath(".//tbody[@id='#{format}_meta_binding_winner']").xpath("(.//td[@class=' price ' and #{contains_currency_condition}]|.//span[@class='price' and #{contains_currency_condition}])").each do |element|
        
      
        
          12
          
                  tr = element.xpath(".//ancestor::tr[@class='bucketBorderTop'][1]").first
        
      
        
          12
          
                  if tr.present?
        
      
        
          7
          
                    format_title = tr.css('td.tmm_bookTitle').first.text.squish
        
      
        
          
          
                    #.co.uk DOM doesn't specify a specific mass_market_paperback tbody so we find it in paperback and set the format accordingly
        
      
        
          7
          
                    format_title = ScraperUtilities.coerce_amazon_format(format_title)
        
      
        
          7
          
                    hash[format_title] = {price: ScraperUtilities.cleanse_price(element.text.strip), asin: tr['id'].gsub('tmm_','')}
        
      
        
          7
          
                    break
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          7
          
              end.presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            #TODO Test
        
      
        
          1
          
            def scrape_related_format_data_2
        
      
        
          
          
              @page.css('#formats #twister span.a-declarative').each_with_object({}) do |element, hash|
        
      
        
          8
          
                if element.css('th a').present? && element.css('td')[1].present? && (element.css('td')[1].text.strip.include?('$') || element.css('td')[1].text.strip.include?('£'))
        
      
        
          
          
                  format_title = element.css('th').text.strip
        
      
        
          
          
          
        
      
        
          
          
                  acceptable_formats = %w[Kindle Hardcover Paperback]
        
      
        
          
          
                  if acceptable_formats.any? {|format| format_title.include?(format)}
        
      
        
          
          
                    format_title = ScraperUtilities.coerce_amazon_format(format_title)
        
      
        
          
          
          
        
      
        
          
          
                    hash[format_title] = {price: ScraperUtilities.cleanse_price(element.css('td')[1].text.strip),
        
      
        
          
          
                                          asin: (ScraperUtilities.extract_asin_from_url(element.css('th a').first['href']) if element.css('th a').present?)}
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          2
          
              end.presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            #TODO Test
        
      
        
          1
          
            def scrape_related_format_data_3
        
      
        
          
          
              @page.css('#formats div.top-level').each_with_object({}) do |element, hash|
        
      
        
          8
          
                if element.css('td a').present? && element.css('td a')[1].present? && (element.css('td')[1].text.strip.include?('$') || element.css('td')[1].text.strip.include?('£'))
        
      
        
          8
          
                  format_title = element.css('td a')[1].text.squish
        
      
        
          8
          
                  acceptable_formats = %w[Kindle Hardcover Paperback]
        
      
        
          28
          
                  if acceptable_formats.any? {|format| format_title.include?(format)}
        
      
        
          4
          
                    format_title = ScraperUtilities.coerce_amazon_format(format_title)
        
      
        
          
          
          
        
      
        
          4
          
                    hash[format_title] = {price: ScraperUtilities.cleanse_price(element.css('td')[1].text.strip),
        
      
        
          4
          
                                          asin: (ScraperUtilities.extract_asin_from_url(element.css('td a')[1]['href']) if element.css('td a')[1].present?)}
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          2
          
              end.presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_average_rating_1
        
      
        
          2
          
              @page.css('.reviews .acrRating').text[0, 3] if @page.css('.reviews .acrRating').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_average_rating_2
        
      
        
          2
          
              average_rating_element = @page.xpath(".//div[@class='jumpBar']").css('span.asinReviewsSummary').first
        
      
        
          2
          
              average_rating_element.search('.//span').first['title'][0, 3] unless average_rating_element.blank?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_average_rating_3
        
      
        
          2
          
              average_rating_element = @page.xpath(".//div[@class='buying']").css('span.asinReviewsSummary').first
        
      
        
          2
          
              average_rating_element.search('.//span').first['title'][0, 3] unless average_rating_element.blank?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_average_rating_4
        
      
        
          2
          
              @page.css('#reviewContainer #avgRating span').text.squish[0, 3] if @page.css('#reviewContainer #avgRating span').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_average_rating_5
        
      
        
          1
          
              @page.css('.reviewCountTextLinkedHistogram').first['title'][0, 3] if @page.css('.reviewCountTextLinkedHistogram').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_review_count_1
        
      
        
          2
          
              average_rating_element = @page.xpath(".//div[@class='jumpBar']").css('span.crAvgStars')
        
      
        
          2
          
              average_rating_element.search('.//a')[2].text.split(' ')[0].gsub(',','') unless average_rating_element.blank? || average_rating_element.search('.//a')[2].nil?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_review_count_2
        
      
        
          2
          
              average_rating_element = @page.xpath(".//div[@class='buying']").css('span.crAvgStars')
        
      
        
          2
          
              average_rating_element.search('.//a')[2].text.split(' ')[0].gsub(',','') unless average_rating_element.blank? || average_rating_element.search('.//a')[2].nil?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_review_count_3
        
      
        
          2
          
              @page.css('#reviewContainer #summaryStars').text.squish.gsub(/\(|\)|,/, '') if @page.css('#reviewContainer #summaryStars').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_review_count_4
        
      
        
          3
          
              @page.css('#averageCustomerReviews').text.squish.gsub(/\D/, '').presence || '0' if @page.css('#averageCustomerReviews').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def large_price_element
        
      
        
          6
          
              @page.xpath(".//table[@class='product']//b[@class='priceLarge'] | .//table[@class='product']//span[@class='priceLarge'] | .//table[@class='product ']//b[@class='priceLarge'] | .//table[@class='product ']//span[@class='priceLarge']")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_price_1
        
      
        
          2
          
              if large_price_element.present? && large_price_element.last.parent.previous_element.present? && large_price_element.last.parent.previous_element.text.strip.include?('Prime')
        
      
        
          
          
                ScraperUtilities.cleanse_price(large_price_element.first.text.strip).presence
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_price_2
        
      
        
          2
          
              if large_price_element.count > 1 && large_price_element.first.parent.parent.children.css('.productBlockLabel').first.text.include?('Rent')
        
      
        
          
          
                ScraperUtilities.cleanse_price(large_price_element.last.text.strip).presence
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_price_3
        
      
        
          2
          
              ScraperUtilities.cleanse_price(large_price_element.text.strip).presence if large_price_element.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_price_4
        
      
        
          2
          
              ScraperUtilities.cleanse_price(@page.css('#buyNewSection span.offer-price').text.strip) if @page.css('#buyNewSection span.offer-price').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_price_5
        
      
        
          
          
              ScraperUtilities.cleanse_price(@page.css('#rentalPriceBlockGrid .rentPrice').first.text.strip) if @page.css('#rentalPriceBlockGrid .rentPrice').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_digital_list_price_1
        
      
        
          7
          
              element = @page.css('table.product .productBlockLabel').select {|block_label_element| block_label_element.text.strip.gsub(':', '').downcase == 'digital list price'}.first
        
      
        
          3
          
              ScraperUtilities.cleanse_price(element.parent.css('.listprice, .listPrice').text.strip) if element.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_digital_list_price_2
        
      
        
          3
          
              ScraperUtilities.cleanse_price(@page.css('.digitalListPrice .listprice').text.strip) if @page.css('.digitalListPrice .listprice').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_list_price_1
        
      
        
          19
          
              element = @page.css('table.product .productBlockLabel').select {|block_label_element| block_label_element.text.strip.gsub(':', '').downcase == 'print list price'}.first
        
      
        
          7
          
              ScraperUtilities.cleanse_price(element.parent.css('.listprice, .listPrice').text.strip) if element.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_list_price_2
        
      
        
          5
          
              ScraperUtilities.cleanse_price(@page.css('table.product span.listprice').text.strip) if @page.css('table.product span.listprice').present? && @page.css('table.product span.listprice').first.parent['class'] != 'digitalListPrice'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_list_price_3
        
      
        
          4
          
              ScraperUtilities.cleanse_price(@page.css('table.product td.listPrice').text.strip) if @page.css('table.product td.listPrice').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_list_price_4
        
      
        
          5
          
              ScraperUtilities.cleanse_price(@page.css('#buyBoxInner span.a-text-strike').text.strip) if @page.css('#buyBoxInner span.a-text-strike').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_list_price_5
        
      
        
          1
          
              ScraperUtilities.cleanse_price(@page.css('table.product td.listprice').text.strip) if @page.css('table.product td.listprice').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_ranks
        
      
        
          3
          
              author_ranks = {}.with_indifferent_access
        
      
        
          
          
          
        
      
        
          3
          
              author_ranks['overall_rank'] = @page.css('.kindleAuthorRank .overallRank').first.text[/\d+/] if @page.css('.kindleAuthorRank .overallRank').present?
        
      
        
          
          
          
        
      
        
          3
          
              if @page.css('.kindleAuthorRank .nodeRank').present?
        
      
        
          
          
                # For some reason only the first half of these nodes are real, the rest are just duplicates
        
      
        
          3
          
                total = @page.css('.kindleAuthorRank .nodeRank').count
        
      
        
          3
          
                return nil if total.odd?
        
      
        
          
          
          
        
      
        
          3
          
                @page.css('.kindleAuthorRank .nodeRank').first(total/2).each_with_index do |node, index|
        
      
        
          11
          
                  author_ranks["sub_category_#{index + 1}"] = {category_id: (ScraperUtilities.get_category_id_from_url(node.css('a').last['href']) if node.css('a').present?),
        
      
        
          
          
                                                               rank: node.text[/\d+/],
        
      
        
          
          
                                                               category_name: node.text.gsub(/\u00A0/, ' ').squish.gsub(/#\d+ in /, '')}.with_indifferent_access
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          3
          
              author_ranks.presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_pub_date_1
        
      
        
          2
          
              ScraperUtilities.parse_date_string(@page.css('input#pubdate').first['value']) if @page.css('input#pubdate').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_pub_date_2
        
      
        
          
          
              # different options because of ze damn Germanz and use the monkey patched Date.parse_international to handle German site
        
      
        
          3
          
              xpath_condition = ".//b[contains(text(), 'Publisher')] | .//b[contains(text(), 'Verlag')]"
        
      
        
          
          
          
        
      
        
          3
          
              if @page.css('table td.bucket').xpath(xpath_condition).present?
        
      
        
          
          
                @page.css('table td.bucket').xpath(xpath_condition).first.parent.text.gsub('.','').scan(/\((.*?)\)/).flatten.collect do |potential_date|
        
      
        
          4
          
                  begin
        
      
        
          4
          
                    if @tld == '.de'
        
      
        
          1
          
                      Date.parse_international potential_date
        
      
        
          
          
                    else
        
      
        
          3
          
                      Date.parse potential_date
        
      
        
          
          
                    end
        
      
        
          
          
                  rescue ArgumentError
        
      
        
          1
          
                    nil
        
      
        
          
          
                  end
        
      
        
          3
          
                end.compact.first
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_pub_date_3
        
      
        
          
          
              if @page.css('#nonHeroSection').present? && @page.css('#nonHeroSection').xpath(".//span[contains(text(), 'Publisher')]").present?
        
      
        
          
          
                publisher_text = @page.css('#nonHeroSection').xpath(".//span[contains(text(), 'Publisher')]").first.parent.parent.css('td')[1].text
        
      
        
          
          
                ScraperUtilities.parse_date_string(publisher_text.scan(/\((.*)\)/).flatten.first) if publisher_text.scan(/\((.*)\)/).flatten.present?
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sold_by_1
        
      
        
          1
          
              sold_by = nil
        
      
        
          
          
          
        
      
        
          1
          
              @page.css('.buying > b').each do |element|
        
      
        
          2
          
                sold_by = element.text.strip if element.parent.text.include? 'Ships from and sold by'
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              sold_by
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sold_by_2
        
      
        
          1
          
              sold_by = nil
        
      
        
          
          
          
        
      
        
          1
          
              @page.css('.productBlockLabel').each do |td|
        
      
        
          4
          
                sold_by = td.parent.css('td').last.text.strip if td.text.strip.include? 'Sold by'
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              sold_by
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sold_by_3
        
      
        
          1
          
              sold_by = nil
        
      
        
          
          
          
        
      
        
          1
          
              @page.css('td.bucket div.content li').each do |li|
        
      
        
          10
          
                if li.text.include?('Sold by')
        
      
        
          1
          
                  sold_by = li.text.gsub('Sold by:', '').strip
        
      
        
          1
          
                  sold_by[0] = '' if sold_by.bytes.first == 194
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              sold_by
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sold_by_4
        
      
        
          1
          
              @page.css('#merchant-info').text.strip.split("\n").first.strip.gsub('Ships from and sold by ', '') if @page.css('#merchant-info').present? && @page.css('#merchant-info').text.strip.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_publisher
        
      
        
          
          
              # different options because of ze damn Germanz
        
      
        
          3
          
              xpath_condition = ".//b[contains(text(), 'Publisher')] | .//b[contains(text(), 'Verlag')]"
        
      
        
          3
          
              @page.css('table td.bucket').xpath(xpath_condition).first.parent.text.scan(/:\s(.*)\s\(/).flatten.first if @page.css('table td.bucket').xpath(xpath_condition).present? && @page.css('table td.bucket').xpath(xpath_condition).first.parent.text.scan(/:\s(.*)\s\(/).flatten.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_dimensions
        
      
        
          
          
              # different options because of ze damn Germanz
        
      
        
          6
          
              xpath_condition = ".//b[contains(text(), 'Product Dimensions')] | .//b[contains(text(), 'Größe und/oder Gewicht')]"
        
      
        
          6
          
              if @page.css('table td.bucket').xpath(xpath_condition).present?
        
      
        
          6
          
                if @tld == '.com'
        
      
        
          5
          
                  @page.css('table td.bucket').xpath(xpath_condition).first.parent.text.split(':').last.gsub(',', '').strip
        
      
        
          
          
                else
        
      
        
          1
          
                  @page.css('table td.bucket').xpath(xpath_condition).first.parent.text.split(':').last.strip
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_weight
        
      
        
          6
          
              xpath_condition = ".//b[contains(text(), 'Shipping Weight')]"
        
      
        
          6
          
              if @page.css('table td.bucket').xpath(xpath_condition).present?
        
      
        
          6
          
                if @tld == '.com'
        
      
        
          6
          
                  @page.css('table td.bucket').xpath(xpath_condition).first.parent.text.split(':').last.gsub(',', '').gsub(' (View shipping rates and policies)', '').strip
        
      
        
          
          
                else
        
      
        
          
          
                  @page.css('table td.bucket').xpath(xpath_condition).first.parent.text.split(':').last.gsub(' (View shipping rates and policies)', '').strip
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_page_count
        
      
        
          2
          
              xpath_condition = ".//li[contains(text(), 'pages')]"
        
      
        
          2
          
              @page.css('table td.bucket').xpath(xpath_condition).first.text.split(':').last.strip.split(' ').first if @page.css('table td.bucket').xpath(xpath_condition).present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_language
        
      
        
          
          
              # different options because of ze damn Germanz
        
      
        
          3
          
              xpath_condition = ".//b[contains(text(), 'Language')] | .//b[contains(text(), 'Sprache')]"
        
      
        
          3
          
              @page.css('table td.bucket').xpath(xpath_condition).first.parent.text.split(': ').last.strip if @page.css('table td.bucket').xpath(xpath_condition).present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_isbn_13_1
        
      
        
          2
          
              isbn13_xpath_condition = ".//b[contains(text(), 'ISBN-13')]"
        
      
        
          2
          
              @page.css('table td.bucket').xpath(isbn13_xpath_condition).first.parent.text.split(': ').last.gsub('-', '').strip if @page.css('table td.bucket').xpath(isbn13_xpath_condition).present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_isbn_13_2
        
      
        
          1
          
              isbn10_xpath_condition = ".//b[contains(text(), 'ISBN-10')]"
        
      
        
          1
          
              if @page.css('table td.bucket').xpath(isbn10_xpath_condition).present?
        
      
        
          1
          
                isbn10 = @page.css('table td.bucket').xpath(isbn10_xpath_condition).first.parent.text.split(': ').last.strip
        
      
        
          1
          
                isbn10.length == 10 ? ISBN_Tools.isbn10_to_isbn13(isbn10) : nil
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_title_1
        
      
        
          2
          
              @page.css('#btAsinTitle').text.split('[').first.strip if @page.css('#btAsinTitle').present? && @page.css('#btAsinTitle').text.split('[')
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_title_2
        
      
        
          2
          
              @page.xpath("//h1[@id='title']/text()").text.strip if @page.xpath("//h1[@id='title']").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_1
        
      
        
          2
          
              @page.css('span.author a.contributorNameID').first.text.strip if @page.css('span.author a.contributorNameID').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_2
        
      
        
          1
          
              @page.css('.contributorNameTrigger').first.text.strip if @page.css('.contributorNameTrigger').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_3
        
      
        
          2
          
              @page.css('span.byLinePipe').first.parent.css('a').first.text.strip if @page.css('span.byLinePipe').present? && @page.css('span.byLinePipe').first.parent.css('a').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_amazon_description
        
      
        
          1
          
              @page.css('noscript div').inner_text.strip.squish
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_most_helpful_amazon_reviews
        
      
        
          3
          
              @page.css('div#revMHRL>div')[0..2].each_with_object([]) do |review_root,most_helpful_reviews|
        
      
        
          9
          
                author = review_root.css('div span span a').present? ? review_root.css('div span span a').first.inner_text.strip.squish : nil
        
      
        
          9
          
                date = review_root.css('div span').present? && review_root.css('div span')[3].present? ? ScraperUtilities.parse_date_string(review_root.css('div span')[3].inner_text.strip.squish.split(' on ').last) : nil
        
      
        
          
          
          
        
      
        
          9
          
                if review_root['id'].strip.include? 'rev-dpReviewsMostHelpfulAUI'
        
      
        
          6
          
                  like_stats = review_root.css('div span').present? ? review_root.css('div span').first.inner_text.strip.squish.split(' ') : []
        
      
        
          6
          
                  found_useful_count, total_vote_count = like_stats[0], like_stats[2]
        
      
        
          6
          
                  star_count = review_root.css('div div a').present? && review_root.css('div div a').first['title'].present? ? review_root.css('div div a').first['title'].strip.squish.split(' ').first : nil
        
      
        
          6
          
                  review_data_root = review_root.css("div#revData#{review_root['id'][3..-1]} div").present? ? review_root.css("div#revData#{review_root['id'][3..-1]} div")[0] : nil
        
      
        
          6
          
                  if review_data_root.present? && review_data_root.css('span.MHRHead').present?
        
      
        
          1
          
                    review = review_data_root.css('span').length>2 ? review_data_root.css('span')[0..-3].inner_text.strip.squish + ' ' : review_data_root.css('span')[0].inner_text.strip.squish
        
      
        
          1
          
                    review += review_data_root.css('span').length>2 ? review_data_root.css('span')[-2]['data-columnbalancing-showfullreview'][9..-3].strip.squish : review_data_root.css('span')[1]['data-columnbalancing-showfullreview'][9..-3]
        
      
        
          
          
                  else
        
      
        
          5
          
                    review = review_data_root.present? ? review_data_root.inner_text.strip.squish : nil
        
      
        
          
          
                  end
        
      
        
          
          
                else
        
      
        
          3
          
                  like_stats = review_root.css('div').present? ? review_root.css('div').first.inner_text.strip.squish.split(' ') : []
        
      
        
          3
          
                  found_useful_count, total_vote_count = like_stats[0], like_stats[2]
        
      
        
          3
          
                  star_count = review_root.css('div span').first['title'].present? ? review_root.css('div span').first['title'].strip.squish.split(' ').first : nil
        
      
        
          3
          
                  review_data_root = review_root.css('div.reviewText>div.drkgry')[0]
        
      
        
          3
          
                  if review_data_root.present? && review_data_root.css('span.MHRHead').present?
        
      
        
          1
          
                    review = review_data_root.css('span.MHRHead').inner_text.strip.squish + ' '
        
      
        
          1
          
                    review += review_data_root.css('span')[1].inner_text.strip.squish
        
      
        
          
          
                  else
        
      
        
          2
          
                    review = review_data_root.present? ? review_data_root.inner_text.strip.squish : nil
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                end
        
      
        
          
          
                most_helpful_reviews << {found_useful_count: found_useful_count,
        
      
        
          
          
                                         total_vote_count: total_vote_count,
        
      
        
          
          
                                         star_count: star_count,
        
      
        
          
          
                                         author: author,
        
      
        
          
          
                                         date: date,
        
      
        
          9
          
                                         review: review}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_physical_isbn13s
        
      
        
          2
          
              physical_isbn13s =  []
        
      
        
          2
          
              @page.xpath(".//div[@class='cBoxInner']//tbody").each do |book_version|
        
      
        
          34
          
                if book_version['id'].present? && (book_version['id'].include?('paperback') || book_version['id'].include?('hardcover')) && book_version.children.present?
        
      
        
          6
          
                  search_isbn = book_version.children.first['id'].gsub('tmm_', '')
        
      
        
          6
          
                  physical_isbn13s << ISBN_Tools.isbn10_to_isbn13(search_isbn) if ISBN_Tools.is_valid?(search_isbn)
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          2
          
              physical_isbn13s
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_tag_1
        
      
        
          3
          
              @page.css('div.buying/a').first if @tld == '.co.uk'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_tag_2
        
      
        
          2
          
              @page.css('div.buying/span/a').first
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_tag_3
        
      
        
          3
          
              @page.css('span.author a').first
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_tag_4
        
      
        
          
          
              @page.css('span.contributorNameTrigger a').first if @tld == '.co.uk'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_asin_1
        
      
        
          1
          
              author = @page.search(".//span[@class='contributorNameTrigger']").first
        
      
        
          1
          
              author.parent.search('.//input').first['value'] if author.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_asin_2
        
      
        
          
          
              if scrape_author_tag.present?
        
      
        
          
          
                author_url = scrape_author_tag['href']
        
      
        
          
          
          
        
      
        
          
          
                if author_url.present? && scrape_author_tag['asin'].present? && Utilities.is_author_asin?(ScraperUtilities.extract_asin_from_url(author_url))
        
      
        
          
          
                  ScraperUtilities.extract_asin_from_url(author_url)
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_asin_3
        
      
        
          1
          
              if scrape_author_tag.present?
        
      
        
          1
          
                author_url = scrape_author_tag['href']
        
      
        
          
          
          
        
      
        
          1
          
                if author_url.present? && author_url.exclude?('UTF8') && Utilities.is_author_asin?(ScraperUtilities.extract_asin_from_url(author_url))
        
      
        
          1
          
                  ScraperUtilities.extract_asin_from_url(author_url)
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_asin_4
        
      
        
          
          
              ScraperUtilities.extract_asin_from_url(@page.css('.author_page_link a').first.attributes['href'].value) if @page.css('.author_page_link a').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_page_url
        
      
        
          
          
              scrape_author_tag.present? ? force_absolute_url(scrape_author_tag['href']) : nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_author_name
        
      
        
          
          
              scrape_author_tag.present? ? scrape_author_tag.text.strip : nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Only tested on a few pages
        
      
        
          1
          
            def scrape_parent_asin_1
        
      
        
          1068
          
              if @page.css('script').select{|x| x.text.include? 'twister-media-matrix'}.present?
        
      
        
          1964
          
                @page.css('script').select{|x| x.text.include? 'twister-media-matrix'}.first.text.split(@asin).last.split(', ')[1].gsub("\"",'') if @page.css('script').select{|x| x.text.include? 'twister-media-matrix'}.first.text.include?(@asin)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Only tested on a few pages
        
      
        
          1
          
            def scrape_parent_asin_2
        
      
        
          115
          
              if @page.css('script').select{|x| x.text.include? 'media-matrix'}.present?
        
      
        
          81
          
                @page.css('script').select{|x| x.text.include? 'media-matrix'}.first.text.split('tasParentAsin=').last.split('&').first if @page.css('script').select{|x| x.text.include? 'media-matrix'}.first.text.include?('tasParentAsin=')
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # TODO Refactor, scrape methods shouldn't create other page objects, there is no good way to handle captchas and retries
        
      
        
          
          
            # This only works on .de for now
        
      
        
          1
          
            def scrape_competitive_related_format_data
        
      
        
          
          
              # We currently only use this on the .de site (it also works on .co.uk) which is why "Mass Market Paperback" maps to
        
      
        
          
          
              # "paperback". On the .com site this maps to mass_market_paperback.
        
      
        
          10
          
              format_to_dom_format_hash = {'Paperback' => 'paperback',
        
      
        
          
          
                                           'Taschenbuch' => 'paperback',
        
      
        
          
          
                                           'Mass Market Paperback' => 'paperback',
        
      
        
          
          
                                           'Hardcover' => 'hardcover',
        
      
        
          
          
                                           'Gebundene Ausgabe' => 'hardcover',
        
      
        
          
          
                                           'Kindle Edition' => 'kindle'}
        
      
        
          10
          
              competitive_details = {parent_asin: scrape_parent_asin, featured_title: nil, competitive_titles: [], valid_page: false}
        
      
        
          
          
          
        
      
        
          
          
              # We need to be able to map a book_format to the specific string that represents it in the <tr>'s on Amazon in
        
      
        
          
          
              # the related format boxes. We use .include because Kindle Editions come in a few forms but all contain 'Kindle Edition'
        
      
        
          10
          
              if scrape_book_format.present?
        
      
        
          9
          
                dom_format = scrape_book_format.exclude?('Kindle') ? format_to_dom_format_hash[scrape_book_format] : format_to_dom_format_hash['Kindle Edition']
        
      
        
          
          
          
        
      
        
          9
          
                plus_link = nil
        
      
        
          
          
          
        
      
        
          
          
                # Can't do anything without a parent_asin and an active row, this usually indicates a lack of related format box or
        
      
        
          
          
                # non purchasable title, leave valid_page set to false
        
      
        
          9
          
                if scrape_parent_asin.present? && @page.css('tr.activeRow').present?
        
      
        
          8
          
                  competitive_details[:valid_page] = true
        
      
        
          
          
          
        
      
        
          
          
                  # Check if this title is the featured title
        
      
        
          8
          
                  featured = @page.css('tr.activeRow').first.parent['id'].include? '_winner'
        
      
        
          
          
          
        
      
        
          
          
                  # Get the tr for the featured title whether its this @asin or not and set the featured asin
        
      
        
          8
          
                  featured_tr = nil
        
      
        
          8
          
                  if @page.xpath(".//tbody[@id='#{dom_format}_meta_binding_winner']/tr").present?
        
      
        
          8
          
                    featured_tr = @page.xpath(".//tbody[@id='#{dom_format}_meta_binding_winner']/tr").first
        
      
        
          8
          
                    competitive_details[:featured_title] = featured_tr['id'].gsub('tmm_', '') if featured_tr.present?
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  # Get the plus link for the featured title if it exists, if there is no buttonTD td element, there is no plus link
        
      
        
          8
          
                  plus_link = Urls.amazon_related_format_url(scrape_parent_asin, dom_format, '.de') if featured_tr.present? && featured_tr.css('td.tmm_buttonTD').present?
        
      
        
          
          
          
        
      
        
          
          
                  # If this (@asin) is not a featured title and the featured title is valid, add this to the competitive list
        
      
        
          8
          
                  if !featured && featured_tr.present? && featured_tr.css('td.price').text.include?('EUR')
        
      
        
          2
          
                    competitive_details[:competitive_titles] << {asin: featured_tr['id'].gsub('tmm_', '')}
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  # If there is a plus link for this related format, collect competitive titles
        
      
        
          8
          
                  if plus_link.present?
        
      
        
          7
          
                    @plus_link_page ||= AmazonProductPage.new plus_link
        
      
        
          
          
          
        
      
        
          7
          
                    if @plus_link_page.ok?
        
      
        
          7
          
                      @plus_link_page.dom.css('tr').each do |tr|
        
      
        
          
          
                        # Only return valid purchasable competitive titles that aren't the @asin
        
      
        
          15
          
                        if tr.css('td.price').text.include? 'EUR'
        
      
        
          10
          
                          asin = tr['id'].gsub('tmm_', '')
        
      
        
          10
          
                          competitive_details[:competitive_titles] << {asin: asin} if asin != @asin
        
      
        
          
          
                        end
        
      
        
          
          
                      end
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          10
          
              competitive_details
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_related_formats_with_amazon_price
        
      
        
          2
          
              contains_currency_condition = ScraperUtilities::CURRENCY_CONDITION
        
      
        
          2
          
              %w[paperback hardcover mass_market_paperback kindle].each_with_object({}) do |format, hash|
        
      
        
          8
          
                @page.xpath(".//tbody[@id='#{format}_meta_binding_winner']").xpath("(.//td[@class=' price ' and #{contains_currency_condition}])").each do |element|
        
      
        
          3
          
                  tr = element.xpath(".//ancestor::tr[@class='bucketBorderTop'][1]").first
        
      
        
          3
          
                  if tr.present?
        
      
        
          
          
                    # 2nd column is format
        
      
        
          2
          
                    format_title = tr.css('td.tmm_bookTitle').first.text.squish.strip
        
      
        
          2
          
                    hash[format_title] = {price: ScraperUtilities.cleanse_price(element.text.strip), asin: tr['id'].gsub('tmm_','')}
        
      
        
          2
          
                    break
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # TODO Refactor, scrape methods shouldn't create other page objects, there is no good way to handle captchas and retries
        
      
        
          1
          
            def scrape_cheapest_print_list_price
        
      
        
          2
          
              return {format: 'Original Title', print_list_price: scrape_amazon_list_price} if scrape_amazon_list_price.present?
        
      
        
          
          
          
        
      
        
          
          
              # Find cheapest related format then go to its page to the get the print list price
        
      
        
          2
          
              related_formats = scrape_related_formats_with_amazon_price.reject {|x| x.include?('Kindle Edition') || x.downcase.include?('bargain')}
        
      
        
          
          
          
        
      
        
          1
          
              ordered_formats = ['Mass Market Paperback', 'Paperback', 'Hardcover'].each_with_object({}) do |format, hash|
        
      
        
          3
          
                hash[format] = related_formats[format] if related_formats[format].present?
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # Check each format in order and return if it has a valid print list price
        
      
        
          1
          
              ordered_formats.each_pair do |format, details|
        
      
        
          1
          
                @ordered_format_pages ||= {}
        
      
        
          1
          
                @ordered_format_pages[format] ||= AmazonProductPage.by_asin_and_tld details[:asin], '.com'
        
      
        
          1
          
                if @ordered_format_pages[format].ok?
        
      
        
          1
          
                  list_price = @ordered_format_pages[format].scrape_amazon_list_price
        
      
        
          1
          
                  return {format: format, print_list_price: list_price} if list_price.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_availability_1
        
      
        
          2
          
              @page.css('#buybox #availability').text.squish if @page.css('#buybox #availability').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_availability_2
        
      
        
          1
          
              @page.css('.buying .availOrange').text.squish if @page.css('.buying .availOrange').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_kindle_unlimited
        
      
        
          3
          
              @page.css('.buying img[alt="Kindle Unlimited"]').present?
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AmazonSearchPage < AmazonPage
        
      
        
          1
          
            def self.by_isbn_or_asin_and_tld(isbn_or_asin, tld)
        
      
        
          
          
              new Urls.amazon_search_page(isbn_or_asin, tld)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(url, user_agent = 'Mac FireFox')
        
      
        
          24
          
              super url, user_agent
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def search_results
        
      
        
          
          
              @page.css('.results .celwidget').collect do |widget|
        
      
        
          
          
                details = {}
        
      
        
          
          
          
        
      
        
          
          
                details[:category_name] = @page.css('#breadCrumb').text.squish.gsub('›', '>') if @page.css('#breadCrumb').present?
        
      
        
          
          
                details[:rank] = widget['id'].gsub('result_', '').to_i + 1
        
      
        
          
          
                details[:author_name] = widget.css('.newaps .med.reg').text.squish.scan(/^by (.*?) \(/).flatten.first.squish if widget.css('.newaps .med.reg').present?
        
      
        
          
          
                details[:price] = ScraperUtilities.cleanse_price(widget.css('.rsltL .digp .bld.red.lrg, .rsltL .newp .bld.red.lrg').text.strip) if widget.css('.rsltL .digp .bld.red.lrg, .rsltL .newp .bld.red.lrg').present?
        
      
        
          
          
                details[:title] = widget.css('.newaps a .lrg.bold').text.strip if widget.css('.newaps a .lrg.bold').present?
        
      
        
          
          
                details[:asin] = ScraperUtilities.extract_asin_from_url(widget.css('.image a').first['href']) if widget.css('.image a').present?
        
      
        
          
          
                details[:url] = widget.css('.image a').first['href'] if widget.css('.image a').present?
        
      
        
          
          
          
        
      
        
          
          
                details
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Finds the matching url from the list of search results
        
      
        
          
          
            # Assumes the first URL is the match if there is only one result, otherwise consults url_hints, which is an array of asins
        
      
        
          1
          
            def matching_url_from_search_results(url_hints)
        
      
        
          4
          
              urls = scrape_search_results_urls
        
      
        
          4
          
              urls.present? && urls.count == 1 ? urls.first : ScraperUtilities.match_url_with_asins(urls, url_hints)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_search_results_urls_1
        
      
        
          
          
              @page.css('.results h3 > a').collect do |anchor|
        
      
        
          12
          
                anchor.attributes['href'].try(:text)
        
      
        
          11
          
              end.compact
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_search_results_urls_2
        
      
        
          
          
              @page.css('.results .image a').collect do |anchor|
        
      
        
          1
          
                anchor.attributes['href'].try(:text)
        
      
        
          4
          
              end.compact
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_search_result_asins_1
        
      
        
          
          
              @page.css('.asinReviewsSummary').collect{|x| x['name']} if @page.css('.asinReviewsSummary').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_search_result_asins_2
        
      
        
          
          
              anchors = @page.css('.results h3 > a')
        
      
        
          
          
              anchors.collect{|anchor| ScraperUtilities.extract_asin_from_url(anchor.attributes['href'].text)} if anchors.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_search_result_asins_3
        
      
        
          
          
              anchors = @page.css('.results .image a')
        
      
        
          
          
              anchors.collect{|anchor| ScraperUtilities.extract_asin_from_url(anchor.attributes['href'].text)} if anchors.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Does not return the url for current page, only OTHER pagination links
        
      
        
          
          
            # Fills in pagination links not visible that exist after the last given pagination link, thus only truly works from the first result page for now
        
      
        
          1
          
            def scrape_pagination_urls
        
      
        
          
          
              # Get all urls from active, numbered pagination links
        
      
        
          
          
              urls = @page.css('#pagn .pagnLink a').collect do |element|
        
      
        
          
          
                force_absolute_url element['href']
        
      
        
          
          
              end
        
      
        
          
          
              # Fill in urls past the active pagination links up to the last disabled, numerical pagination link
        
      
        
          
          
              if @page.css('#pagn .pagnDisabled').present?
        
      
        
          
          
                max_page_number = @page.css('#pagn .pagnDisabled').text.strip.to_i
        
      
        
          
          
                min_page_number = urls.last.scan(/page=(\d+)/).flatten.first.to_i + 1
        
      
        
          
          
                (min_page_number..max_page_number.to_i).each_with_object(urls) do |num|
        
      
        
          
          
                  urls << urls.last.gsub(/sr_pg_\d+/, "sr_pg_#{num}").gsub(/page=\d+/, "page=#{num}")
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              urls.presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_search_page_number
        
      
        
          
          
              @page.css('#pagn .pagnCur').text.to_i
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class AppleTopBooksRssFeed < Page
        
      
        
          1
          
            def self.by_category_id_and_type(category_id, type)
        
      
        
          9
          
              new Urls.apple_book_category_feed(category_id, type)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(url, user_agent = 'Mac FireFox')
        
      
        
          9
          
              super url, user_agent
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def stats
        
      
        
          1
          
              @page.xpath('//entry').each_with_index.collect do |element, index|
        
      
        
          
          
                {rank: index + 1,
        
      
        
          
          
                 title: get_title(element),
        
      
        
          
          
                 author: get_author(element),
        
      
        
          
          
                 itunes_id: get_itunes_id(element),
        
      
        
          200
          
                 price: get_price(element)}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def get_title(element)
        
      
        
          202
          
              element.xpath('name').text.first(255).presence if element.xpath('name').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_author(element)
        
      
        
          202
          
              element.xpath('artist').text.presence if element.xpath('artist').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_itunes_id(element)
        
      
        
          202
          
              element.xpath('id').attribute('im:id').value.presence if element.xpath('id').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_price(element)
        
      
        
          202
          
              (element.xpath('price').attribute('amount').value.to_f * 100).to_i if element.xpath('price').present? && element.xpath('price').attribute('amount').value.present?
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class BnBookPage < BnPage
        
      
        
          1
          
            def self.by_ean(ean)
        
      
        
          15
          
              new Urls.bn_book_page(ean)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # On physical book pages sometimes the physical price and nook price will be listed but it will always get the main
        
      
        
          
          
            # physical price. On Nook pages only the Nook price will be listed so the first item will be blank and then the Nook
        
      
        
          
          
            # price will be found. Textbook pages are a totally different layout.
        
      
        
          1
          
            def scrape_price_1
        
      
        
          1
          
              ScraperUtilities.cleanse_price(@page.css('.product-price .price').text.strip) if @page.css('.product-price .price').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_price_2
        
      
        
          1
          
              ScraperUtilities.cleanse_price(@page.css('.nook-price .price').text.strip) if @page.css('.nook-price .price').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_price_3
        
      
        
          1
          
              ScraperUtilities.cleanse_price(@page.css('.buy-box-textbook .price').first.text.strip) if @page.css('.buy-box-textbook .price').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_nook_price
        
      
        
          1
          
              ScraperUtilities.cleanse_price(@page.css('.nook-price .price').text.strip) if @page.css('.nook-price .price').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_nook_list_price
        
      
        
          1
          
              ScraperUtilities.cleanse_price(@page.css('.nook-price .list').text.strip) if @page.css('.nook-price .list').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_list_price_1
        
      
        
          1
          
              ScraperUtilities.cleanse_price(@page.css('.list').first.text.strip) if @page.css('.list').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_list_price_2
        
      
        
          
          
              # textbook style List Price scrape
        
      
        
          1
          
              @page.css('.product-details-textbook ul li').each do |li|
        
      
        
          1
          
                if li.text.include? 'List Price'
        
      
        
          1
          
                  return li.css('.value').present? ? ScraperUtilities.cleanse_price(li.css('.value').text.strip) : nil
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_average_rating
        
      
        
          1
          
              @page.css('span.starDisplay > span.stars-large').attr('class').value.split(' ').second.gsub('r', '').gsub('h', '.5') if @page.css('div.reviews-share').present? && @page.css('span.starDisplay').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_rating_count
        
      
        
          2
          
              @page.css('span.starDisplay > span.total').text.scan(/\d+/).first if @page.css('div.reviews-share').present? && @page.css('span.starDisplay').present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_review_count
        
      
        
          1
          
              scrape_rating_count
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_sales_rank
        
      
        
          1
          
              product_details_elements = @page.css('div.product-details ul li')
        
      
        
          1
          
              if @page.css('div.reviews-share').present? && product_details_elements.present?
        
      
        
          1
          
                product_details_elements.each do |li|
        
      
        
          8
          
                  return li.text.gsub(',', '').scan(/\d+/).first if li.text.include? 'Sales rank'
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_related_format_data
        
      
        
          1
          
              @page.css('li.format').each do |element|
        
      
        
          1
          
                if element.text.include? 'NOOK'
        
      
        
          1
          
                  nook_price_element = element.css('div.bn-price a').first
        
      
        
          1
          
                  nook_price = nook_price_element.present? ? ScraperUtilities.cleanse_price(nook_price_element.text.strip) : nil
        
      
        
          1
          
                  nook_ean = nook_price_element.present? && nook_price_element['data-bn-rel'].present? ? ScraperUtilities.parse_ean_from_bn_url(nook_price_element['data-bn-rel']) : nil
        
      
        
          1
          
                  return {'NOOK Book' => {price: nook_price, ean: nook_ean}}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_also_boughts
        
      
        
          
          
              @page.css('.display-tile-item').each_with_object([]) do |element, array|
        
      
        
          21
          
                title_element = element.css('a > img').first
        
      
        
          21
          
                if title_element.present?
        
      
        
          21
          
                  author_element = element.css('.contributor').first
        
      
        
          21
          
                  price_element = element.css('.price').first
        
      
        
          
          
                  array << {title: title_element.attributes['alt'].text.squish,
        
      
        
          21
          
                            ean: (element['data-bn-ean']),
        
      
        
          21
          
                            author: (author_element.text.squish if author_element.present?),
        
      
        
          42
          
                            price: (ScraperUtilities.cleanse_price(price_element.text.strip) if price_element.present?)}
        
      
        
          
          
                end
        
      
        
          1
          
              end.presence
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_nook_ean
        
      
        
          2
          
              @page.css('li.format').each do |element|
        
      
        
          2
          
                if element.text.include? 'NOOK'
        
      
        
          2
          
                  return element.css('span.name').present? && element.css('span.name').first.parent['href'].present? ? ScraperUtilities.parse_ean_from_bn_url(element.css('span.name').first.parent['href']) : nil
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            #TODO Unused, delete?
        
      
        
          
          
            #def self.determine_ean_and_format_from_li(li)
        
      
        
          
          
            #  div = li.css('div')
        
      
        
          
          
            #  ean = div.first['data-bn-ean']
        
      
        
          
          
            #  format = div.css('div.price-format > a > span.format').text
        
      
        
          
          
            #
        
      
        
          
          
            #  if format.include? 'BN.com'
        
      
        
          
          
            #    # If you see this in traversal it means the book is broken/redirection loop
        
      
        
          
          
            #    return ean,nil
        
      
        
          
          
            #  end
        
      
        
          
          
            #
        
      
        
          
          
            #  return ean,format
        
      
        
          
          
            #end
        
      
        
          
          
          end

    
      
        
          1
          
          class BnCategoryPage < BnPage
        
      
        
          1
          
            def self.by_category_id_and_book_format_and_page_number(category_id, book_format, page_number)
        
      
        
          
          
              new Urls.bn_category_page(category_id, book_format, page_number)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(url, user_agent = 'Mac FireFox')
        
      
        
          
          
              @page = HttpHelper.get_bn_category_page_html url, user_agent
        
      
        
          
          
              super url, user_agent
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class BnNookDailyFindPage < BnPage
        
      
        
          1
          
            def initialize(user_agent = 'Mac FireFox')
        
      
        
          3
          
              super 'http://www.barnesandnoble.com/u/ebook-nook-daily-find-bargain-deal/379003102/', user_agent
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_daily_find_book
        
      
        
          1
          
              details = {}
        
      
        
          1
          
              details[:daily_deal_type] = @page.css('.st-single-product .header').text.squish if @page.css('.st-single-product .header').present?
        
      
        
          1
          
              details[:title] = @page.css('.st-single-product .body .title').text.squish
        
      
        
          1
          
              details[:author_name] = @page.css('.st-single-product .body .contributor').text.squish
        
      
        
          1
          
              details[:price] = ScraperUtilities.cleanse_price(@page.css('.st-single-product .body .price').text)
        
      
        
          1
          
              ean = @page.css('.st-single-product .body .title a').present? ? ScraperUtilities.parse_ean_from_bn_url(@page.css('.st-single-product .body .title a').first.attributes['href'].value) : nil
        
      
        
          1
          
              if ISBN_Tools.is_valid? ean
        
      
        
          1
          
                details[:isbn] = ean
        
      
        
          
          
              else
        
      
        
          
          
                details[:bn_id] = ean
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              details
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scrape_daily_find_carousels
        
      
        
          1
          
              daily_finds = []
        
      
        
          1
          
              @page.css('.st-carousel').each do |carousel|
        
      
        
          4
          
                carousel.css('.product-root-node').each_with_index do |product_container, index|
        
      
        
          80
          
                  details = {}
        
      
        
          80
          
                  details[:daily_deal_type] = carousel.css('.header h3').text.squish
        
      
        
          80
          
                  details[:rank] = index + 1
        
      
        
          80
          
                  details[:title] = product_container.css('.linked-image img').first.attributes['alt'].value.scan(/Title: (.*?), Author:/).flatten.first.squish if product_container.css('.linked-image img').present? && product_container.css('.linked-image img').first.attributes['alt'].value.present?
        
      
        
          80
          
                  details[:title] = product_container.css('.product-title').text.squish if details[:title].blank?
        
      
        
          80
          
                  details[:author_name] = product_container.css('.contributers-line').text.squish
        
      
        
          80
          
                  ean = product_container.css('.product-title a').present? ? ScraperUtilities.parse_ean_from_bn_url(product_container.css('.product-title a').first.attributes['href'].value) : nil
        
      
        
          80
          
                  if ISBN_Tools.is_valid? ean
        
      
        
          75
          
                    details[:isbn] = ean
        
      
        
          
          
                  else
        
      
        
          5
          
                    details[:bn_id] = ean
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          80
          
                  daily_finds << details
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              daily_finds
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class BnNookUnder299 < BnPage
        
      
        
          1
          
            START_NUMBERS = %w[1 31 61 91].freeze
        
      
        
          1
          
            def self.by_start_number(num)
        
      
        
          3
          
              raise 'Invalid Starting Number' unless START_NUMBERS.include? num.to_s
        
      
        
          
          
          
        
      
        
          3
          
              new "http://www.barnesandnoble.com/u/ebooks-nook-books-bargain-deal-3-or-less/379003858?start=#{num}"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def book_details
        
      
        
          1
          
              book_details = []
        
      
        
          1
          
              rank = query_parameters['start'].first.to_i
        
      
        
          1
          
              @page.css('.result').each_with_index do |element, index|
        
      
        
          30
          
                details = {}
        
      
        
          30
          
                details[:daily_deal_type] = 'NOOK Books Under $2.99'
        
      
        
          30
          
                details[:rank] = rank + index
        
      
        
          30
          
                details[:title] = element.css('.image-block img').first.attributes['alt'].value.scan(/Title: (.*?), Author:/).flatten.first.squish if element.css('.image-block img').present? && element.css('.image-block img').first.attributes['alt'].value.present?
        
      
        
          30
          
                details[:title] = element.css('.title a').first.text.squish if details[:title].blank? && element.css('.title a').present?
        
      
        
          30
          
                details[:author] = element.css('.contributor a').first.text.squish if element.css('.contributor a').present?
        
      
        
          30
          
                details[:price] = element.css('.pricing.bn-price strong').first.text.squish.gsub(/\$|\./, '') if element.css('.pricing.bn-price strong').present?
        
      
        
          30
          
                details[:url] = force_absolute_url(element.css('.title a').first.attributes['href'].text) if element.css('.title a').present?
        
      
        
          30
          
                ean = details[:url].present? ? ScraperUtilities.parse_ean_from_bn_url(details[:url]) : nil
        
      
        
          30
          
                if ISBN_Tools.is_valid? ean
        
      
        
          24
          
                  details[:isbn] = ean
        
      
        
          
          
                else
        
      
        
          6
          
                  details[:bn_id] = ean
        
      
        
          
          
                end
        
      
        
          30
          
                book_details << details
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              book_details
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class BnPage < Page
        
      
        
          1
          
            def initialize(url, user_agent = 'Mac FireFox')
        
      
        
          24
          
              begin
        
      
        
          24
          
                super url, user_agent
        
      
        
          
          
              rescue Net::HTTP::Persistent::Error
        
      
        
          
          
                @net_persistent_error = true
        
      
        
          
          
                {}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def net_persistent_error?
        
      
        
          3
          
              @net_persistent_error
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class BnSearchPage < BnPage
        
      
        
          1
          
            def self.by_isbn(isbn)
        
      
        
          
          
              new Urls.bn_search_page(isbn)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.by_title_and_author_and_book_format(title, author, book_format)
        
      
        
          3
          
              new Urls.bn_search_page_by_details(title, author, book_format), 'Mac FireFox', book_format
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(url, user_agent = 'Mac FireFox', book_format = '')
        
      
        
          3
          
              super url, user_agent
        
      
        
          3
          
              @search_category = if book_format.include? 'Paperback'
        
      
        
          
          
                                   'Paperback'
        
      
        
          
          
                                 elsif book_format.include? 'Kindle'
        
      
        
          3
          
                                   'NOOK'
        
      
        
          
          
                                 else
        
      
        
          
          
                                   'Hardcover'
        
      
        
          
          
                                 end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def ean_for(pub_date)
        
      
        
          
          
              # check if any search results show same pub date as book version, then check for a book of your type in that block
        
      
        
          3
          
              if @page.search(".//li[@id='search-result0']/div/section/p/a").present?
        
      
        
          3
          
                @page.search(".//span[@class='date']").each do |date|
        
      
        
          5
          
                  begin
        
      
        
          5
          
                    if Date.strptime(date.text, '(%m/%d/%Y)') == pub_date
        
      
        
          
          
                      # Traverse to the top of the block and search if block has a book of the correct type to get the ean
        
      
        
          1
          
                      date.parent.parent.parent.parent.search(".//table[@class='displayed-formats']//tr//a").each do |a|
        
      
        
          
          
                        #scrape url and get EAN which is ISBN or BN ID (URLs are in 2 possible formats)
        
      
        
          1
          
                        return ScraperUtilities.parse_ean_from_bn_url(a['data-bn-rel']) if a.attr('data-bntrack').include?(@search_category)
        
      
        
          
          
                      end
        
      
        
          
          
                    end
        
      
        
          
          
                  rescue ArgumentError => e
        
      
        
          4
          
                    Rails.logger.tagged('cleanup') {Rails.logger.info "Pub date in date bound search flow returned malformed date string, can't parse date: #{e.message}"}
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def no_results?
        
      
        
          
          
              @page.css('div.search-noresults-message').present? && @page.css('div.search-noresults-message').text.strip.include?('Sorry, we could not find what you were looking for.')
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class BookVersionCategory < ActiveRecord::Base
        
      
        
          
          
            # Attributes
        
      
        
          
          
          
        
      
        
          1
          
            attr_accessible :warehouse_book_version, :warehouse_book_version_id, :warehouse_category, :warehouse_category_id,
        
      
        
          
          
                            :warehouse_region, :warehouse_region_id, :category_name
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          
          
          
        
      
        
          1
          
            belongs_to :warehouse_book_version, inverse_of: :book_version_categories
        
      
        
          1
          
            belongs_to :warehouse_category, inverse_of: :book_version_categories
        
      
        
          1
          
            belongs_to :warehouse_region, inverse_of: :book_version_categories
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          
          
          
        
      
        
          1
          
            validates_presence_of :warehouse_book_version_id, :warehouse_region_id, :category_name
        
      
        
          1
          
            validates_uniqueness_of :category_name, scope: :warehouse_book_version_id
        
      
        
          
          
          end

    
      
        
          1
          
          class BookVersionException < ActiveRecord::Base
        
      
        
          
          
            # Attributes
        
      
        
          
          
          
        
      
        
          1
          
            attr_accessible :warehouse_book_version, :warehouse_book_version_id, :warehouse_date, :warehouse_date_id,
        
      
        
          
          
                            :amazon_not_found_in_search, :amazon_no_image, :amazon_no_buy_button, :amazon_no_price,
        
      
        
          
          
                            :bn_not_found_in_search, :no_isbn, :amazon_ambiguous_result, :apple_invalid
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          
          
          
        
      
        
          1
          
            belongs_to :warehouse_book_version, inverse_of: :book_version_exceptions
        
      
        
          1
          
            belongs_to :warehouse_date, inverse_of: :book_version_exceptions
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          
          
          
        
      
        
          1
          
            validates_presence_of :warehouse_book_version_id
        
      
        
          1
          
            validates_presence_of :warehouse_date_id
        
      
        
          
          
          end

    
      
        
          1
          
          class BookVersionStatusCollection < Mongo::Collection
        
      
        
          1
          
            def initialize(opts = {})
        
      
        
          12
          
              super 'book_version_statuses', $mongodb, opts
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def set_book_version_status(warehouse_book_version_id, status)
        
      
        
          4
          
              update({_id: warehouse_book_version_id}, {'$set' => {status: status}}, upsert: true)
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class BooklrStat < ActiveRecord::Base
        
      
        
          
          
            # Attributes
        
      
        
          
          
          
        
      
        
          1
          
            attr_accessible :date, :number_of_books, :number_of_book_versions, :number_of_ingested_book_versions,
        
      
        
          
          
                            :number_of_book_version_stats, :amazon_average_rating_total, :amazon_review_count_total, :barnes_average_rating_total,
        
      
        
          
          
                            :barnes_rating_count_total, :barnes_review_count_total, :amazon_sales_rank_total, :amazon_sales_rank_category_total,
        
      
        
          
          
                            :barnes_sales_rank_total, :amazon_list_price_total, :amazon_price_total, :sub_category1_id_total,
        
      
        
          
          
                            :sub_category1_tree_total, :sub_category1_rank_total, :sub_category2_id_total, :sub_category2_tree_total,
        
      
        
          
          
                            :sub_category2_rank_total, :sub_category3_id_total, :sub_category3_tree_total, :sub_category3_rank_total,
        
      
        
          
          
                            :likes_total, :digital_list_price_total, :bn_nook_price_total, :bn_nook_list_price_total, :bn_price_total,
        
      
        
          
          
                            :also_bought_total, :bought_after_viewing_total, :frequently_bought_together_total, :bn_also_bought_total,
        
      
        
          
          
                            :similar_items_by_category_total, :amazon_related_format_data_total, :bn_related_format_data_total,
        
      
        
          
          
                            :author_ranks_total, :bn_list_price_total, :itunes_price_total, :itunes_average_rating_total,
        
      
        
          
          
                            :itunes_rating_count_total, :goodreads_work_average_rating_total, :goodreads_work_rating_count_total,
        
      
        
          
          
                            :goodreads_work_review_count_total, :goodreads_work_added_by_count_total, :goodreads_work_to_read_count_total,
        
      
        
          
          
                            :goodreads_edition_average_rating_total, :goodreads_edition_rating_count_total, :goodreads_edition_review_count_total,
        
      
        
          
          
                            :goodreads_edition_added_by_count_total
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          
          
          
        
      
        
          1
          
            validates_presence_of :date
        
      
        
          1
          
            validates_uniqueness_of :date
        
      
        
          
          
          
        
      
        
          
          
            # Miscellaneous
        
      
        
          
          
          
        
      
        
          1
          
            def self.amazon_attributes
        
      
        
          1
          
              %w[amazon_average_rating_total amazon_review_count_total amazon_sales_rank_total amazon_sales_rank_category_total
        
      
        
          
          
                 amazon_list_price_total amazon_price_total sub_category1_id_total sub_category1_tree_total sub_category1_rank_total
        
      
        
          
          
                 sub_category2_id_total sub_category2_tree_total sub_category2_rank_total sub_category3_id_total sub_category3_tree_total
        
      
        
          
          
                 sub_category3_rank_total likes_total digital_list_price_total author_ranks_total]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.barnes_and_noble_attributes
        
      
        
          1
          
              %w[barnes_average_rating_total barnes_rating_count_total barnes_review_count_total barnes_sales_rank_total bn_nook_price_total
        
      
        
          
          
                 bn_nook_list_price_total bn_price_total bn_list_price_total]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.itunes_attributes
        
      
        
          1
          
              %w[itunes_price_total itunes_average_rating_total itunes_rating_count_total]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.goodreads_attributes
        
      
        
          1
          
              %w[goodreads_work_average_rating_total goodreads_work_rating_count_total goodreads_work_review_count_total
        
      
        
          
          
                 goodreads_work_added_by_count_total goodreads_work_to_read_count_total goodreads_edition_average_rating_total
        
      
        
          
          
                 goodreads_edition_rating_count_total goodreads_edition_review_count_total goodreads_edition_added_by_count_total]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.customer_behavior_attributes
        
      
        
          1
          
              %w[also_bought_total bought_after_viewing_total frequently_bought_together_total bn_also_bought_total
        
      
        
          
          
                 similar_items_by_category_total amazon_related_format_data_total bn_related_format_data_total]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def coverage_for(attribute)
        
      
        
          2
          
              self.send(attribute).present? ? self.send(attribute) / (number_of_ingested_book_versions || number_of_book_versions).to_f : 0.0
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def method_missing(method_name, *args, &block)
        
      
        
          3
          
              method_name =~ /^coverage_for_(.+)$/ ? coverage_for($1) : super
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class CategoryStat < ActiveRecord::Base
        
      
        
          1
          
            attr_accessible :best_rank, :worst_rank, :book_version_count, :mean_rank, :median_rank, :category_name,
        
      
        
          
          
                            :best_rank_book_version, :best_rank_book_version_id, :worst_rank_book_version, :worst_rank_book_version_id,
        
      
        
          
          
                            :warehouse_region, :warehouse_region_id, :warehouse_date, :warehouse_date_id, :warehouse_category,
        
      
        
          
          
                            :warehouse_category_id
        
      
        
          
          
          
        
      
        
          1
          
            delegate :date, to: :warehouse_date
        
      
        
          1
          
            delegate :tld, to: :warehouse_category
        
      
        
          
          
          
        
      
        
          1
          
            belongs_to :best_rank_book_version, class_name: 'WarehouseBookVersion', foreign_key: 'best_rank_book_version_id', inverse_of: :best_rank_category_stats
        
      
        
          1
          
            belongs_to :worst_rank_book_version, class_name: 'WarehouseBookVersion', foreign_key: 'worst_rank_book_version_id', inverse_of: :worst_rank_category_stats
        
      
        
          1
          
            belongs_to :warehouse_region, inverse_of: :warehouse_categories
        
      
        
          1
          
            belongs_to :warehouse_date, inverse_of: :warehouse_categories
        
      
        
          1
          
            belongs_to :warehouse_category, inverse_of: :category_stats
        
      
        
          
          
          end

    
      
        
          1
          
          class CategoryStatsCollection < Mongo::Collection
        
      
        
          1
          
            def initialize(date, opts = {})
        
      
        
          
          
              super "category_stats_#{date.to_s.gsub('-', '')}", $mongodb, opts
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def add_category_stats(category_stats)
        
      
        
          
          
              insert category_stats
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class GoodreadsBookPage < Page
        
      
        
          1
          
            def self.by_key(key)
        
      
        
          3
          
              new Urls.goodreads_book_page(key)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(url, user_agent = 'Windows Mozilla')
        
      
        
          3
          
              super url, user_agent
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def rating_details
        
      
        
          3
          
              ratings_details = nil
        
      
        
          
          
          
        
      
        
          
          
              # rating details popup box is in a <script> tag with the DOM elements, find it then re-parse it
        
      
        
          3
          
              if @page.css('#bookMeta script').present?
        
      
        
          3
          
                text = @page.css('#bookMeta script').first.text
        
      
        
          3
          
                doc = Nokogiri::HTML(text.gsub("\\n", '').gsub("\\", '').gsub(/\\\"/, '').squish)
        
      
        
          
          
          
        
      
        
          3
          
                if doc.css('table').present? && doc.css('table').count > 1
        
      
        
          3
          
                  values = doc.css('table').last.css('span').collect(&:text)
        
      
        
          3
          
                  ratings_details = {goodreads_work_average_rating: values[0],
        
      
        
          
          
                                     goodreads_work_rating_count: values[1],
        
      
        
          
          
                                     goodreads_work_review_count: values[2],
        
      
        
          
          
                                     goodreads_work_added_by_count: values[3],
        
      
        
          
          
                                     goodreads_work_to_read_count: values[4],
        
      
        
          
          
                                     goodreads_edition_average_rating: values[5],
        
      
        
          
          
                                     goodreads_edition_rating_count: values[6],
        
      
        
          
          
                                     goodreads_edition_review_count: values[7],
        
      
        
          
          
                                     goodreads_edition_added_by_count: values[8]}.with_indifferent_access
        
      
        
          
          
          
        
      
        
          3
          
                  if doc.css('table#rating_distribution').present?
        
      
        
          3
          
                    doc.css('table#rating_distribution tr')[1..5].each do |tr|
        
      
        
          15
          
                      star_count = tr.css('th').first.text.squish
        
      
        
          15
          
                      value = tr.css('td').last.text.squish
        
      
        
          15
          
                      ratings_details["goodreads_#{star_count}_star_count"] = value
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          3
          
              ratings_details
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class Page
        
      
        
          1
          
            attr_reader :url, :user_agent, :response_code
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(url, user_agent = nil)
        
      
        
          220
          
              @url = url
        
      
        
          220
          
              @user_agent = user_agent
        
      
        
          220
          
              @uri = URI.parse(URI.encode(url))
        
      
        
          220
          
              @base_url = "#{@uri.scheme}://#{@uri.host}"
        
      
        
          220
          
              @tld = case
        
      
        
          
          
                       when @uri.hostname.ends_with?('.com')
        
      
        
          194
          
                         '.com'
        
      
        
          
          
                       when @uri.hostname.ends_with?('.co.uk')
        
      
        
          2
          
                         '.co.uk'
        
      
        
          
          
                       when @uri.hostname.ends_with?('.de')
        
      
        
          24
          
                         '.de'
        
      
        
          
          
                       else
        
      
        
          
          
                         nil
        
      
        
          
          
                     end
        
      
        
          220
          
              @scraped_at = Time.current
        
      
        
          220
          
              begin
        
      
        
          220
          
                @page ||= user_agent.present? ? HttpHelper.get_html_with_mechanize_no_rescue(url, user_agent) : HttpHelper.get_html(url)
        
      
        
          219
          
                @ok = @page.present?
        
      
        
          219
          
                @response_code = '200'
        
      
        
          219
          
                @socks_error = false
        
      
        
          1
          
              rescue *HTTP_ERRORS => e
        
      
        
          1
          
                if e.class == Mechanize::ResponseCodeError
        
      
        
          1
          
                  error = {error_class: Mechanize::ResponseCodeError.to_s, code: e.response_code}
        
      
        
          1
          
                  @response_code = e.response_code
        
      
        
          1
          
                  @error_class = e.class.to_s
        
      
        
          
          
                elsif e.is_a? SOCKSError
        
      
        
          
          
                  error = {error_class: e.class.to_s}
        
      
        
          
          
                  @socks_error = true
        
      
        
          
          
                else
        
      
        
          
          
                  error = {error_class: e.class.to_s}
        
      
        
          
          
                end
        
      
        
          1
          
                Utilities.log('http_error', error)
        
      
        
          1
          
                {}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def ok?
        
      
        
          193
          
              @ok
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def socks_error?
        
      
        
          40
          
              @socks_error
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def dom
        
      
        
          7
          
              @page
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def query_parameters
        
      
        
          1
          
              CGI.parse(@uri.query)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def method_missing(method_name, *args, &block)
        
      
        
          8639
          
              methods_to_call = public_methods(false).select {|method| method.to_s.starts_with?('scrape_') && method =~ /^#{Regexp.quote method_name}_\d+$/}
        
      
        
          107
          
              if methods_to_call.present?
        
      
        
          107
          
                return nil unless ok?
        
      
        
          
          
          
        
      
        
          105
          
                methods_to_call.each do |method|
        
      
        
          151
          
                  value = send method
        
      
        
          151
          
                  return value if value.present?
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                nil
        
      
        
          
          
              else
        
      
        
          
          
                super
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def force_absolute_url(url)
        
      
        
          119
          
              ScraperUtilities.force_absolute_url url, @base_url
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class Report
        
      
        
          1
          
            STATUSES = [:new, :processing, :unable_to_process, :error_while_processing, :completed]
        
      
        
          1
          
            def self.collection
        
      
        
          
          
              $mongodb.collection('reports')
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.create(doc = {})
        
      
        
          
          
              report = new doc
        
      
        
          
          
              report.save
        
      
        
          
          
          
        
      
        
          
          
              report
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.find(id)
        
      
        
          
          
              id = id.is_a?(String) && BSON::ObjectId.legal?(id) ? BSON::ObjectId.from_string(id) : id
        
      
        
          
          
              record = collection.find(_id: id).limit(1).first
        
      
        
          
          
              record.present? ? record['report_name'].constantize.new(record) : nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.where(conditions = {})
        
      
        
          
          
              conditions.merge report_name: self.class.to_s unless self.class == Report
        
      
        
          
          
              collection.find(conditions).sort({created_at: -1}).collect do |record|
        
      
        
          
          
                record['report_name'].constantize.new record
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.start_batch(ids, batch_id)
        
      
        
          
          
              query = ids.collect {|id| {_id: id}}
        
      
        
          
          
              collection.update({'$or' => query}, {'$push' => {batch_ids: batch_id}, '$set' => {status: 'processing'}}, upsert: true) if query.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(doc = {})
        
      
        
          
          
              @document = {_id: BSON::ObjectId.new, status: 'new', report_name: self.class.to_s, file_details: {}}.with_indifferent_access.merge doc
        
      
        
          
          
              @document['batch_ids'] = Array.wrap(@document['batch_ids'])
        
      
        
          
          
              existing_report = collection.find(_id: @document[:_id]).limit(1).first
        
      
        
          
          
              @document = existing_report.with_indifferent_access if existing_report.present?
        
      
        
          
          
              @document[:file_details] = @document[:file_details].with_indifferent_access
        
      
        
          
          
              @document[:file_details].each {|key, val| @document[:file_details][key] = val.with_indifferent_access}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def id
        
      
        
          
          
              @document['_id']
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            %w[report_name status created_at error params s3_url batch_ids file_details].each do |method_name|
        
      
        
          8
          
              define_method method_name do
        
      
        
          
          
                @document[method_name]
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def collection
        
      
        
          
          
              @collection ||= Report.collection
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def save
        
      
        
          
          
              raise ArgumentError unless @document[:batch_ids].is_a?(Array)
        
      
        
          
          
          
        
      
        
          
          
              @document['created_at'] ||= Time.current.to_time
        
      
        
          
          
              collection.update({_id: id}, @document, upsert: true)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def destroy
        
      
        
          
          
              collection.remove _id: id
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class ReportBatch < Report
        
      
        
          1
          
            def initialize(extra = {})
        
      
        
          
          
              extra = extra.with_indifferent_access
        
      
        
          
          
              missing_keys = %i[job_type batch_params].select {|key| extra[key].blank?}
        
      
        
          
          
              raise ArgumentError.new("Missing Keys: #{missing_keys.join(', ')}") unless missing_keys.blank?
        
      
        
          
          
          
        
      
        
          
          
              extra.merge! job_type: extra[:job_type], batch_params: extra[:batch_params]
        
      
        
          
          
          
        
      
        
          
          
              super extra
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def params
        
      
        
          
          
              @document.select {|key, _| %w[job_type batch_params].include? key}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def batch_params
        
      
        
          
          
              @document[:batch_params]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def job_type
        
      
        
          
          
              @document[:job_type]
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class ReportCards::FishRichardson < ReportCards::ReportCard
        
      
        
          1
          
            @report_name = :fish_richardson
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(email, ftp, gzip, enable_report_blocking)
        
      
        
          
          
              super email, ftp, gzip, enable_report_blocking
        
      
        
          
          
          
        
      
        
          
          
              @client_name = :fish_richardson
        
      
        
          
          
              @report_name = self.class.report_name
        
      
        
          
          
              @header = ['ISBN', 'Title', 'Author', 'Format', 'Published Date', 'Length', 'Width', 'Depth', 'Similar Item Category 1',
        
      
        
          
          
                         'Similar Item Category 2', 'Similar Item Category 3']
        
      
        
          
          
              @report_row_keys = %w[warehouse_book_versions_isbn13 warehouse_book_versions_title warehouse_book_versions_author_name
        
      
        
          
          
                                    warehouse_book_versions_book_format warehouse_book_versions_pub_date warehouse_book_versions_physical_details
        
      
        
          
          
                                    warehouse_book_versions_physical_details warehouse_book_versions_physical_details
        
      
        
          
          
                                    warehouse_stats_amazon_similar_item_category_tree_1 warehouse_stats_amazon_similar_item_category_tree_2
        
      
        
          
          
                                    warehouse_stats_amazon_similar_item_category_tree_3]
        
      
        
          
          
              @warehouse_book_version_ids = WarehouseBookVersion.ingested.com.where{physical_details != nil}.
        
      
        
          
          
                  where(book_format: %w[Paperback Hardcover]).where("physical_details LIKE '%inches%'").order(:id).value_of(:id)
        
      
        
          
          
              @expected_count = nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def output_row!(row_keys, row)
        
      
        
          
          
              @warehouse_book_version_id_index ||= row_keys.index('warehouse_stats_warehouse_book_version_id')
        
      
        
          
          
              @physical_details_index ||= row_keys.index('warehouse_book_versions_physical_details')
        
      
        
          
          
              if @warehouse_book_version_ids.bsearch {|warehouse_book_version_id| row[@warehouse_book_version_id_index].to_i - warehouse_book_version_id}.present?
        
      
        
          
          
                output_row = generate_output_row(row)
        
      
        
          
          
          
        
      
        
          
          
                # output_row[5] [6] and [7] are the same field, physical_details, if it has 2 x's the data has 3 dimensions, if it has 1 x it has 2 dimensions
        
      
        
          
          
                # and we assume its just length and width
        
      
        
          
          
          
        
      
        
          
          
                if output_row[5].scan(' x ').count == 2
        
      
        
          
          
                  # set all 3 values then sort from biggest to smallest so depth always ends up last
        
      
        
          
          
                  dimensions = [output_row[5].split(' x ').first, output_row[6].split(' x ').second, output_row[7].split(' x ').third.split(' inches').first].map{|x| x.to_f}.sort.reverse
        
      
        
          
          
                  output_row[5] = dimensions[0]
        
      
        
          
          
                  output_row[6] = dimensions[1]
        
      
        
          
          
                  output_row[7] = dimensions[2]
        
      
        
          
          
                elsif output_row[5].scan(' x ').count == 1
        
      
        
          
          
                  # set both values then sort from biggest to smallest so depth always ends up last
        
      
        
          
          
                  dimensions = [output_row[5].split(' x ').first, output_row[6].split(' x ').second.split(' inches').first].map{|x| x.to_f}.sort.reverse
        
      
        
          
          
                  output_row[5] = dimensions[0]
        
      
        
          
          
                  output_row[6] = dimensions[1].to_f > 2 ? dimensions[1] : 'N/A'
        
      
        
          
          
                  output_row[7] = dimensions[1].to_f < 2 ? dimensions[1] : 'N/A'
        
      
        
          
          
                else
        
      
        
          
          
                  return false
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                @csv << as_csv_row(output_row)
        
      
        
          
          
          
        
      
        
          
          
                true
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def send_all_complete?
        
      
        
          
          
              true
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def base_filename
        
      
        
          
          
              "fish-richardson-title-dimensions-report-#{@report_date.strftime('%m%d%y')}"
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class ReportCards::RandomHouseCorporate < ReportCards::ReportCard
        
      
        
          1
          
            @report_name = :corporate
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(email, ftp, gzip, enable_report_blocking)
        
      
        
          6
          
              super email, ftp, gzip, enable_report_blocking
        
      
        
          
          
          
        
      
        
          6
          
              @client_name = :rhinc
        
      
        
          6
          
              @report_name = self.class.report_name
        
      
        
          6
          
              @header = ['ISBN', 'Title', 'Author', 'Publisher', 'Pub Date', 'Format', 'Nook Price', 'BN Nook List Price', 'BN Rank',
        
      
        
          
          
                         'BN Date Stamp', 'BN Time Stamp', 'Amz Actual Price', 'Amz Digital List Price', 'Amz Print List Price',
        
      
        
          
          
                         'ASIN', 'Amz Sales Rank', 'Amz Category 1', 'Amz Category 1 Rnk', 'Amz Category 2', 'Amz Category 2 Rnk',
        
      
        
          
          
                         'Amz Category 3', 'Amz Category 3 Rnk', 'Amz Category 4', 'Amz Category 4 Rnk', 'Amz Date Stamp', 'Amz Time Stamp',
        
      
        
          
          
                         'BN ID', 'BN Physical List Price', 'goodreads_work_average_rating', 'goodreads_work_rating_count',
        
      
        
          
          
                         'goodreads_work_review_count', 'goodreads_work_added_by_count', 'goodreads_work_to_read_count',
        
      
        
          
          
                         'goodreads_edition_average_rating', 'goodreads_edition_rating_count', 'goodreads_edition_review_count',
        
      
        
          
          
                         'goodreads_edition_added_by_count', 'goodreads_work_5_star_count', 'goodreads_work_4_star_count',
        
      
        
          
          
                         'goodreads_work_3_star_count', 'goodreads_work_2_star_count', 'goodreads_work_1_star_count']
        
      
        
          6
          
              @expected_count = WarehouseBookVersion.com.ingested.count if enable_report_blocking
        
      
        
          6
          
              @report_row_keys = ['warehouse_book_versions_isbn13', 'warehouse_book_versions_title', 'warehouse_book_versions_author_name',
        
      
        
          
          
                                  'warehouse_book_versions_publisher', 'warehouse_book_versions_pub_date', 'warehouse_book_versions_book_format',
        
      
        
          
          
                                  'warehouse_stats_bn_nook_price', 'warehouse_stats_bn_nook_list_price', 'warehouse_stats_bn_sales_rank',
        
      
        
          
          
                                  'warehouse_dates_date', '00:00:00', 'warehouse_stats_amazon_price', 'warehouse_stats_amazon_digital_list_price',
        
      
        
          
          
                                  'warehouse_stats_amazon_list_price', 'warehouse_book_versions_asin', 'warehouse_stats_amazon_sales_rank',
        
      
        
          
          
                                  'warehouse_amazon_category1_name', 'warehouse_stats_amazon_category1_rank', 'warehouse_amazon_category2_name',
        
      
        
          
          
                                  'warehouse_stats_amazon_category2_rank', 'warehouse_amazon_category3_name', 'warehouse_stats_amazon_category3_rank',
        
      
        
          
          
                                  '', '', 'warehouse_dates_date', '00:00:00', 'warehouse_book_versions_bn_id', 'warehouse_stats_bn_list_price',
        
      
        
          
          
                                  'warehouse_stats_goodreads_work_average_rating', 'warehouse_stats_goodreads_work_rating_count',
        
      
        
          
          
                                  'warehouse_stats_goodreads_work_review_count', 'warehouse_stats_goodreads_work_added_by_count',
        
      
        
          
          
                                  'warehouse_stats_goodreads_work_to_read_count', 'warehouse_stats_goodreads_edition_average_rating',
        
      
        
          
          
                                  'warehouse_stats_goodreads_edition_rating_count', 'warehouse_stats_goodreads_edition_review_count',
        
      
        
          
          
                                  'warehouse_stats_goodreads_edition_added_by_count', 'warehouse_stats_goodreads_5_star_count',
        
      
        
          
          
                                  'warehouse_stats_goodreads_4_star_count', 'warehouse_stats_goodreads_3_star_count',
        
      
        
          
          
                                  'warehouse_stats_goodreads_2_star_count', 'warehouse_stats_goodreads_1_star_count']
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def send_all_complete?
        
      
        
          2
          
              true
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def base_filename
        
      
        
          3
          
              "random-house-corporate-report-v3-#{@report_date.strftime("%m%d%y")}"
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class ReportCards::RandomHouseCorporate2 < ReportCards::ReportCard
        
      
        
          1
          
            @report_name = :corporate2
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(email, ftp, gzip, enable_report_blocking)
        
      
        
          
          
              super email, ftp, gzip, enable_report_blocking
        
      
        
          
          
          
        
      
        
          
          
              @client_name = :rhinc
        
      
        
          
          
              @report_name = self.class.report_name
        
      
        
          
          
              @header = ['ISBN', 'Title', 'Author', 'Publisher', 'Pub Date', 'Format', 'Nook Price', 'BN Nook List Price', 'BN Rank',
        
      
        
          
          
                         'BN Date Stamp', 'BN Time Stamp', 'Amz Actual Price', 'Amz Digital List Price', 'Amz Print List Price',
        
      
        
          
          
                         'ASIN', 'Amz Sales Rank', 'Amz Category 1', 'Amz Category 1 Rnk', 'Amz Category 2', 'Amz Category 2 Rnk',
        
      
        
          
          
                         'Amz Category 3', 'Amz Category 3 Rnk', 'Amz Category 4', 'Amz Category 4 Rnk', 'Amz Date Stamp', 'Amz Time Stamp',
        
      
        
          
          
                         'BN ID', 'BN Physical List Price', 'goodreads_work_average_rating', 'goodreads_work_rating_count',
        
      
        
          
          
                         'goodreads_work_review_count', 'goodreads_work_added_by_count', 'goodreads_work_to_read_count',
        
      
        
          
          
                         'goodreads_edition_average_rating', 'goodreads_edition_rating_count', 'goodreads_edition_review_count',
        
      
        
          
          
                         'goodreads_edition_added_by_count', 'goodreads_work_5_star_count', 'goodreads_work_4_star_count',
        
      
        
          
          
                         'goodreads_work_3_star_count', 'goodreads_work_2_star_count', 'goodreads_work_1_star_count', 'kindle_unlimited']
        
      
        
          
          
              @expected_count = WarehouseBookVersion.com.ingested.count if enable_report_blocking
        
      
        
          
          
              @report_row_keys = ['warehouse_book_versions_isbn13', 'warehouse_book_versions_title', 'warehouse_book_versions_author_name',
        
      
        
          
          
                                  'warehouse_book_versions_publisher', 'warehouse_book_versions_pub_date', 'warehouse_book_versions_book_format',
        
      
        
          
          
                                  'warehouse_stats_bn_nook_price', 'warehouse_stats_bn_nook_list_price', 'warehouse_stats_bn_sales_rank',
        
      
        
          
          
                                  'warehouse_dates_date', '00:00:00', 'warehouse_stats_amazon_price', 'warehouse_stats_amazon_digital_list_price',
        
      
        
          
          
                                  'warehouse_stats_amazon_list_price', 'warehouse_book_versions_asin', 'warehouse_stats_amazon_sales_rank',
        
      
        
          
          
                                  'warehouse_amazon_category1_name', 'warehouse_stats_amazon_category1_rank', 'warehouse_amazon_category2_name',
        
      
        
          
          
                                  'warehouse_stats_amazon_category2_rank', 'warehouse_amazon_category3_name', 'warehouse_stats_amazon_category3_rank',
        
      
        
          
          
                                  '', '', 'warehouse_dates_date', '00:00:00', 'warehouse_book_versions_bn_id', 'warehouse_stats_bn_list_price',
        
      
        
          
          
                                  'warehouse_stats_goodreads_work_average_rating', 'warehouse_stats_goodreads_work_rating_count',
        
      
        
          
          
                                  'warehouse_stats_goodreads_work_review_count', 'warehouse_stats_goodreads_work_added_by_count',
        
      
        
          
          
                                  'warehouse_stats_goodreads_work_to_read_count', 'warehouse_stats_goodreads_edition_average_rating',
        
      
        
          
          
                                  'warehouse_stats_goodreads_edition_rating_count', 'warehouse_stats_goodreads_edition_review_count',
        
      
        
          
          
                                  'warehouse_stats_goodreads_edition_added_by_count', 'warehouse_stats_goodreads_5_star_count',
        
      
        
          
          
                                  'warehouse_stats_goodreads_4_star_count', 'warehouse_stats_goodreads_3_star_count',
        
      
        
          
          
                                  'warehouse_stats_goodreads_2_star_count', 'warehouse_stats_goodreads_1_star_count',
        
      
        
          
          
                                  'warehouse_stats_kindle_unlimited']
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def send_all_complete?
        
      
        
          
          
              true
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def base_filename
        
      
        
          
          
              "random-house-corporate-report-v4-#{@report_date.strftime('%m%d%y')}"
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class ReportCards::RandomHouseCorporateApple < ReportCards::ReportCard
        
      
        
          1
          
            @report_name = :daily_apple
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(email, ftp, gzip, enable_report_blocking)
        
      
        
          9
          
              super email, ftp, gzip, enable_report_blocking
        
      
        
          
          
          
        
      
        
          9
          
              @client_name = :rhinc
        
      
        
          9
          
              @report_name = self.class.report_name
        
      
        
          9
          
              @header = ['ISBN', 'Title', 'Author', 'Publisher', 'Pub Date', 'Format', 'iTunes Price', 'iTunes Average Rating',
        
      
        
          
          
                         'iTunes Rating Count', 'Date Stamp', 'Time Stamp']
        
      
        
          9
          
              @report_row_keys = %w[warehouse_book_versions_isbn13 warehouse_book_versions_title warehouse_book_versions_author_name warehouse_book_versions_publisher
        
      
        
          
          
                                    warehouse_book_versions_pub_date warehouse_book_versions_book_format warehouse_stats_itunes_price warehouse_stats_itunes_average_rating
        
      
        
          
          
                                    warehouse_stats_itunes_rating_count warehouse_dates_date 00:00:00]
        
      
        
          9
          
              @warehouse_book_version_ids = WarehouseBookVersion.com.where('itunes_id is not null').ingested.order(:id).value_of(:id)
        
      
        
          9
          
              @expected_count = @warehouse_book_version_ids.count
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def output_row!(row_keys, row)
        
      
        
          5
          
              @warehouse_book_version_id_index ||= row_keys.index('warehouse_stats_warehouse_book_version_id')
        
      
        
          9
          
              if @warehouse_book_version_ids.bsearch {|warehouse_book_version_id| row[@warehouse_book_version_id_index].to_i - warehouse_book_version_id}.present?
        
      
        
          2
          
                @csv << as_csv_row(generate_output_row(row))
        
      
        
          
          
          
        
      
        
          2
          
                true
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def send_all_complete?
        
      
        
          1
          
              true
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def base_filename
        
      
        
          2
          
              "rh-apple-report-#{@report_date.strftime("%m%d%y")}"
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module ReportCards
        
      
        
          1
          
            class ReportCard
        
      
        
          1
          
              attr_writer :report_date
        
      
        
          
          
          
        
      
        
          1
          
              def self.report_name
        
      
        
          38
          
                @report_name
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.report_card_class_by_report_name(report_name)
        
      
        
          23
          
                descendants.find {|klass| klass.report_name.to_s == report_name.to_s}
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.send_rhinc_ftp_completion
        
      
        
          
          
                # FTP Completion file
        
      
        
          1
          
                file = File.open('/tmp/ALLFILES.DONE', 'w')
        
      
        
          1
          
                ftp = Net::FTP.open('ftp.randomhouse.com', 'booklr', 'B00Klr')
        
      
        
          1
          
                ftp.passive = true
        
      
        
          1
          
                ftp.chdir 'to_rh'
        
      
        
          1
          
                ftp.putbinaryfile file.path
        
      
        
          1
          
                ftp.close
        
      
        
          2
          
                Rails.logger.tagged('enterprise') {Rails.logger.info "----- Report Delivered to RH FTP Server (#{file.path})-----"}
        
      
        
          
          
          
        
      
        
          1
          
                File.delete(file)
        
      
        
          1
          
                file.close
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def initialize(email, ftp, gzip, enable_report_blocking)
        
      
        
          58
          
                @email, @ftp, @gzip, @enable_report_blocking = [email, ftp, gzip, enable_report_blocking]
        
      
        
          
          
          
        
      
        
          58
          
                @csv = @client_name = @report_name = @header = @expected_count = nil
        
      
        
          58
          
                @report_row_keys = []
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def report_key
        
      
        
          36
          
                "#{@client_name}_#{@report_name}"
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def open_csv
        
      
        
          10
          
                file_location = File.join(AmazeBot.config[:reports][:location][Utilities.env], @client_name.to_s, filename)
        
      
        
          10
          
                dirname = File.dirname file_location
        
      
        
          10
          
                begin
        
      
        
          10
          
                  Dir.mkdir dirname unless File.exists? dirname
        
      
        
          
          
                rescue Errno::EEXIST
        
      
        
          
          
                  # Directory already exists
        
      
        
          
          
                end
        
      
        
          10
          
                @csv = File.new file_location, 'wb'
        
      
        
          10
          
                @csv = Zlib::GzipWriter.new(@csv) if @gzip
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def generate_row_key_indexes(row_keys)
        
      
        
          10
          
                @row_key_indexes = @report_row_keys.collect do |report_row_key|
        
      
        
          139
          
                  report_row_key.split('|').collect do |key_part|
        
      
        
          135
          
                    index = row_keys.index(key_part)
        
      
        
          135
          
                    index.present? ? index : key_part
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def get_row_count
        
      
        
          2
          
                $redis.hget('daily_report_stats', "#{report_key}_row_count").to_i
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def set_row_count(count)
        
      
        
          7
          
                $redis.hmset('daily_report_stats', "#{report_key}_row_count", count)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def set_time_sent(time_sent)
        
      
        
          8
          
                $redis.hmset('daily_report_stats', "#{report_key}_completion_time", time_sent)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def insert_header!
        
      
        
          8
          
                @csv << as_csv_row(@header) if @header.present?
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def output_row!(row_keys, row)
        
      
        
          5
          
                csv_row = as_csv_row(generate_output_row(row))
        
      
        
          5
          
                @csv << csv_row
        
      
        
          
          
          
        
      
        
          5
          
                true
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def finalize_output!
        
      
        
          7
          
                @csv.flush
        
      
        
          7
          
                @csv.close
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def move_to_s3
        
      
        
          2
          
                uploader = ReportUploader.new
        
      
        
          2
          
                uploader.client_name = @client_name
        
      
        
          2
          
                uploader.store! File.new(@csv.path)
        
      
        
          
          
          
        
      
        
          2
          
                true
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def deliver_report
        
      
        
          3
          
                if row_count_valid?
        
      
        
          2
          
                  ftp_to_client
        
      
        
          2
          
                  email_client
        
      
        
          4
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_key} Report Delivered -----"}
        
      
        
          2
          
                  set_time_sent Time.current.to_s
        
      
        
          
          
                else
        
      
        
          1
          
                  NotificationMailer.report_row_count_error(report_key, "Report blocked from sending and FTPing because todays count: #{get_row_count} was too far off the expected count: #{@expected_count}").deliver
        
      
        
          2
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_key} Report NOT Delivered: row count off by 0.5% of more -----"}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def send_all_complete?
        
      
        
          1
          
                false
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              private
        
      
        
          
          
          
        
      
        
          1
          
              def generate_output_row(row)
        
      
        
          7
          
                @row_key_indexes.collect do |indexes|
        
      
        
          106
          
                  value = nil
        
      
        
          208
          
                  indexes.each {|index| value ||= index.is_a?(Integer) ? row[index].presence : index}
        
      
        
          
          
          
        
      
        
          106
          
                  value
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def as_csv_row(array)
        
      
        
          11
          
                "#{array.join(',')}\n"
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def base_filename
        
      
        
          
          
                nil
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def filename
        
      
        
          13
          
                @gzip ? "#{base_filename}.csv.gz" : "#{base_filename}.csv"
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def row_count_valid?
        
      
        
          5
          
                !@enable_report_blocking || @expected_count.nil? || ((@expected_count - get_row_count) / @expected_count.to_f).abs < 0.05
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def ftp_to_client
        
      
        
          5
          
                if Rails.env.production? && @ftp
        
      
        
          2
          
                  if @client_name.to_sym == :rhinc
        
      
        
          
          
                    # FTP CSV File
        
      
        
          1
          
                    ftp = Net::FTP.open('ftp.randomhouse.com', 'booklr', 'B00Klr')
        
      
        
          1
          
                    ftp.passive = true
        
      
        
          1
          
                    ftp.chdir 'to_rh'
        
      
        
          1
          
                    ftp.putbinaryfile @csv.path
        
      
        
          1
          
                    ftp.close
        
      
        
          2
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "----- Report Delivered to RH FTP Server (#{@csv.path})-----"}
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def email_client
        
      
        
          4
          
                return unless @email && @report_name.present?
        
      
        
          
          
          
        
      
        
          1
          
                report_hash = {
        
      
        
          
          
                                base_filename: base_filename,
        
      
        
          
          
                                filename: filename,
        
      
        
          
          
                                report_format: 'csv',
        
      
        
          
          
                                client_name: @client_name
        
      
        
          
          
                              }
        
      
        
          1
          
                EnterpriseReportsMailer.basic_report(report_hash, AmazeBot.config[:reports][:clients][@client_name][:reports][@report_name].with_indifferent_access).deliver
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class ReportCards::RhincCustomerBehavior < ReportCards::ReportCard
        
      
        
          1
          
            @report_name = :customer_behavior
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(email, ftp, gzip, enable_report_blocking)
        
      
        
          3
          
              super email, ftp, gzip, enable_report_blocking
        
      
        
          
          
          
        
      
        
          3
          
              @client_name = :rhinc
        
      
        
          3
          
              @report_name = self.class.report_name
        
      
        
          3
          
              @header = %w[bn_review_count bn_average_rating amazon_review_count amazon_average_rating one_star_count two_star_count
        
      
        
          
          
                           three_star_count four_star_count five_star_count amazon_likes] +
        
      
        
          
          
                            WarehouseStat::WAREHOUSE_AMAZON_ALSO_BOUGHT_FIELDS +
        
      
        
          
          
                            WarehouseStat::WAREHOUSE_AMAZON_BOUGHT_AFTER_VIEWING_FIELDS +
        
      
        
          
          
                            WarehouseStat::WAREHOUSE_AMAZON_FREQUENTLY_BOUGHT_TOGETHER_FIELDS +
        
      
        
          
          
                            WarehouseStat::WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_TREE_FIELDS +
        
      
        
          
          
                            WarehouseStat::WAREHOUSE_AMAZON_ALSO_BOUGHT_ITEMS_BY_FIELDS +
        
      
        
          
          
                            WarehouseStat::WAREHOUSE_BN_ALSO_BOUGHT_FIELDS +
        
      
        
          
          
                        %w[overall_author_rank sub_category1_author_rank_id_name sub_category1_author_rank sub_category2_author_rank_id_name
        
      
        
          
          
                           sub_category2_author_rank sub_category3_author_rank_id_name sub_category3_author_rank sub_category4_author_rank_id_name
        
      
        
          
          
                           sub_category4_author_rank]
        
      
        
          3
          
              @report_row_keys = (%w[bn_review_count bn_average_rating amazon_review_count amazon_average_rating one_star_count two_star_count
        
      
        
          
          
                                     three_star_count four_star_count five_star_count amazon_likes] +
        
      
        
          
          
                                      WarehouseStat::WAREHOUSE_AMAZON_ALSO_BOUGHT_FIELDS +
        
      
        
          
          
                                      WarehouseStat::WAREHOUSE_AMAZON_BOUGHT_AFTER_VIEWING_FIELDS +
        
      
        
          
          
                                      WarehouseStat::WAREHOUSE_AMAZON_FREQUENTLY_BOUGHT_TOGETHER_FIELDS +
        
      
        
          
          
                                      WarehouseStat::WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_TREE_FIELDS +
        
      
        
          
          
                                      WarehouseStat::WAREHOUSE_AMAZON_ALSO_BOUGHT_ITEMS_BY_FIELDS +
        
      
        
          396
          
                                      WarehouseStat::WAREHOUSE_BN_ALSO_BOUGHT_FIELDS).collect {|name| "warehouse_stats_#{name}"} +
        
      
        
          
          
                                  %w[warehouse_stats_overall_author_rank
        
      
        
          
          
                                     sub_category1_author_rank_id_name|warehouse_stats_sub_category1_author_rank_id_fallback warehouse_stats_sub_category1_author_rank
        
      
        
          
          
                                     sub_category2_author_rank_id_name|warehouse_stats_sub_category2_author_rank_id_fallback warehouse_stats_sub_category2_author_rank
        
      
        
          
          
                                     sub_category3_author_rank_id_name|warehouse_stats_sub_category3_author_rank_id_fallback warehouse_stats_sub_category3_author_rank
        
      
        
          
          
                                     sub_category4_author_rank_id_name|warehouse_stats_sub_category4_author_rank_id_fallback warehouse_stats_sub_category4_author_rank]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def send_all_complete?
        
      
        
          1
          
              true
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def base_filename
        
      
        
          1
          
              "rhinc-all-customer-behavior-#{@report_date.strftime("%m%d%y")}"
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class SimilarBookPricingReport < Report
        
      
        
          1
          
            include ReportUtilities
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(extra = {})
        
      
        
          
          
              @client_name = :booklr
        
      
        
          
          
              @extra_folders = %w[big-data-reports price-reports]
        
      
        
          
          
              extra = extra.with_indifferent_access
        
      
        
          
          
              missing_keys = []
        
      
        
          
          
              missing_keys << :warehouse_date_id if extra[:warehouse_date_id].blank?
        
      
        
          
          
              missing_keys << :asin << :isbn13 if extra[:asin].blank? && extra[:isbn13].blank?
        
      
        
          
          
              raise ArgumentError.new("Missing Keys: #{missing_keys.join(', ')}") unless missing_keys.blank?
        
      
        
          
          
          
        
      
        
          
          
              extra.merge! _id: "similar-book-pricing-#{extra[:asin] || 'X'}-#{extra[:isbn13] || 'X'}-#{extra[:warehouse_date_id]}", asin: extra[:asin], isbn13: extra[:isbn13], warehouse_date_id: extra[:warehouse_date_id]
        
      
        
          
          
          
        
      
        
          
          
              super extra
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def params
        
      
        
          
          
              @document.select {|key, _| %w[asin isbn13 warehouse_date_id].include? key}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def generate
        
      
        
          
          
              warehouse_date = WarehouseDate.find @document[:warehouse_date_id]
        
      
        
          
          
              book_version = WarehouseBookVersion.find_by(asin: @document[:asin], tld: '.com')
        
      
        
          
          
              stat = WarehouseStat.find_by(warehouse_book_version_id: book_version.id, warehouse_date_id: warehouse_date.id)
        
      
        
          
          
              if stat.blank?
        
      
        
          
          
                @document[:error] = 'Book version is valid but has no data for today, let your big data helpers know so they can investigate. Error in Top100ProjectedRankReport, this should never happen.'
        
      
        
          
          
                @document[:status] = :error_while_processing
        
      
        
          
          
              else
        
      
        
          
          
                average_price_report_name = "similar-top-100-book-pricing-#{@document[:asin]}-#{warehouse_date.date.strftime('%m%d%y')}"
        
      
        
          
          
                price_frequency_report_name = "#{average_price_report_name}-frequency-bar-chart"
        
      
        
          
          
                similar_categories = stat.all_similar_item_categories
        
      
        
          
          
                if similar_categories.present?
        
      
        
          
          
                  all_list_stat_prices = []
        
      
        
          
          
                  list_stat_price_distribution = WarehouseListStat.where(warehouse_category_id: similar_categories.collect(&:id), warehouse_date_id: warehouse_date.id).where.not(asin: book_version.asin).each_with_object({}) do |list_stat, hash|
        
      
        
          
          
                    if list_stat.price.present?
        
      
        
          
          
                      hash[list_stat.price] ||= 0
        
      
        
          
          
                      hash[list_stat.price] += 1
        
      
        
          
          
                      all_list_stat_prices << list_stat.price.to_i
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                  s3_url = output_average_price_report average_price_report_name, warehouse_date.date, book_version, stat, all_list_stat_prices, similar_categories
        
      
        
          
          
                  @document[:file_details][average_price_report_name] = {s3_url: s3_url}
        
      
        
          
          
          
        
      
        
          
          
                  s3_url = output_price_frequency_report price_frequency_report_name, warehouse_date.date, book_version, stat, list_stat_price_distribution, similar_categories
        
      
        
          
          
                  @document[:file_details][price_frequency_report_name] = {s3_url: s3_url}
        
      
        
          
          
                else
        
      
        
          
          
                  @document[:file_details][average_price_report_name] = {error: :unable_to_generate, message: 'Book had no similar item categories'}
        
      
        
          
          
                  @document[:file_details][price_frequency_report_name] = {error: :unable_to_generate, message: 'Book had no similar item categories'}
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                average_price_report_name = "similar-also-bought-book-pricing-#{@document[:asin]}-#{warehouse_date.date.strftime('%m%d%y')}"
        
      
        
          
          
                price_frequency_report_name = "#{average_price_report_name}-frequency-bar-chart"
        
      
        
          
          
                also_bought_asins = 6.times.collect {|num| stat.send("amazon_also_bought_asin_#{num + 1}")}.compact
        
      
        
          
          
                if also_bought_asins.present?
        
      
        
          
          
                  book_versions = WarehouseBookVersion.where(asin: also_bought_asins, tld: '.com')
        
      
        
          
          
                  if book_versions.present?
        
      
        
          
          
                    all_also_bought_prices = []
        
      
        
          
          
                    also_bought_price_distribution = WarehouseStat.where(warehouse_book_version_id: book_versions.collect(&:id), warehouse_date_id: warehouse_date.id).each_with_object({}) do |stat, hash|
        
      
        
          
          
                      if stat.amazon_price.present?
        
      
        
          
          
                        hash[stat.amazon_price] ||= 0
        
      
        
          
          
                        hash[stat.amazon_price] += 1
        
      
        
          
          
                        all_also_bought_prices << stat.amazon_price.to_i
        
      
        
          
          
                      end
        
      
        
          
          
                    end
        
      
        
          
          
                    s3_url = output_average_price_report average_price_report_name, warehouse_date.date, book_version, stat, all_also_bought_prices
        
      
        
          
          
                    @document[:file_details][average_price_report_name] = {s3_url: s3_url}
        
      
        
          
          
          
        
      
        
          
          
                    s3_url = output_price_frequency_report price_frequency_report_name, warehouse_date.date, book_version, stat, also_bought_price_distribution
        
      
        
          
          
                    @document[:file_details][price_frequency_report_name] = {s3_url: s3_url}
        
      
        
          
          
                  end
        
      
        
          
          
                else
        
      
        
          
          
                  @document[:file_details][average_price_report_name] = {error: :unable_to_generate, message: 'Book had no also bought data'}
        
      
        
          
          
                  @document[:file_details][price_frequency_report_name] = {error: :unable_to_generate, message: 'Book had no also bought data'}
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                @document[:status] = :completed
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              save
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def output_average_price_report(report_name, report_date, book_version, stat, all_prices, similar_categories = nil)
        
      
        
          
          
              price_report_hash = EnterpriseReports.generate_report_hash(report_name, @client_name)
        
      
        
          
          
              csv = EnterpriseReports.open_csv(price_report_hash)
        
      
        
          
          
          
        
      
        
          
          
              csv << %w[Title Author Format Asin Price Date]
        
      
        
          
          
              csv << [book_version.title, book_version.author_name, book_version.book_format, book_version.asin, "$#{as_price stat.amazon_price}", report_date.to_s]
        
      
        
          
          
              if similar_categories.present?
        
      
        
          
          
                csv << []
        
      
        
          
          
                csv << ['Categories used']
        
      
        
          
          
                similar_categories.each {|category| csv << [category.name]}
        
      
        
          
          
              end
        
      
        
          
          
              csv << ["Sample Size: #{all_prices.size} books"]
        
      
        
          
          
              csv << []
        
      
        
          
          
              csv << ['Price Mean', 'Price Median', 'Price Mode']
        
      
        
          
          
              csv << ["$#{as_price all_prices.mean}", "$#{as_price all_prices.median}", all_prices.mode.collect {|price| "$#{as_price price}"}.join(', ')]
        
      
        
          
          
          
        
      
        
          
          
              s3_url = EnterpriseReports.move_to_s3(@client_name, csv, @extra_folders)
        
      
        
          
          
              csv.close
        
      
        
          
          
          
        
      
        
          
          
              s3_url
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def output_price_frequency_report(report_name, report_date, book_version, stat, price_distribution, similar_categories = nil)
        
      
        
          
          
              frequency_report_hash = EnterpriseReports.generate_report_hash(report_name, @client_name)
        
      
        
          
          
              csv = EnterpriseReports.open_csv(frequency_report_hash)
        
      
        
          
          
          
        
      
        
          
          
              csv << %w[Title Author Format Asin Price Date]
        
      
        
          
          
              csv << [book_version.title, book_version.author_name, book_version.book_format, book_version.asin, "$#{as_price stat.amazon_price}", report_date.to_s]
        
      
        
          
          
              if similar_categories.present?
        
      
        
          
          
                csv << []
        
      
        
          
          
                csv << ['Categories used']
        
      
        
          
          
                similar_categories.each {|category| csv << [category.name]}
        
      
        
          
          
              end
        
      
        
          
          
              csv << []
        
      
        
          
          
              csv << %w[Price Frequency]
        
      
        
          
          
              price_distribution.sort.each {|price, frequency| csv << ["$#{as_price price}", frequency]}
        
      
        
          
          
              csv << []
        
      
        
          
          
              csv << ['Price Range', 'Frequency']
        
      
        
          
          
              range = (price_distribution.keys.sort.first / 100.0).floor..(price_distribution.keys.sort.last / 100.0).ceil
        
      
        
          
          
              range.each_slice([range.count / 10, 1].max) do |slice|
        
      
        
          
          
                csv << ["$#{as_price(slice.first * 100)} - $#{as_price(slice.last * 100 + 99)}", price_distribution.select {|price, _| price >= slice.first * 100 && price <= slice.last * 100 + 99}.values.sum]
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              s3_url = EnterpriseReports.move_to_s3(@client_name, csv, @extra_folders)
        
      
        
          
          
              csv.close
        
      
        
          
          
          
        
      
        
          
          
              s3_url
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class Top100PriceDistributionReport < Report
        
      
        
          1
          
            include ReportUtilities
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(extra = {})
        
      
        
          
          
              @client_name = :booklr
        
      
        
          
          
              extra = extra.with_indifferent_access
        
      
        
          
          
              raise ArgumentError.new('Missing Keys: warehouse_date_id') unless extra[:warehouse_date_id].present?
        
      
        
          
          
          
        
      
        
          
          
              extra.merge! _id: "top100-price-distribution-#{extra[:warehouse_date_id]}", warehouse_date_id: extra[:warehouse_date_id]
        
      
        
          
          
          
        
      
        
          
          
              super extra
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def params
        
      
        
          
          
              @document.select {|key, _| %w[warehouse_date_id].include? key}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def generate
        
      
        
          
          
              price_distribution = {}
        
      
        
          
          
              major_publisher_price_distribution = {}
        
      
        
          
          
              other_publisher_price_distribution = {}
        
      
        
          
          
              amazon_list_price_distribution = {}
        
      
        
          
          
              major_publisher_amazon_list_price_distribution = {}
        
      
        
          
          
              other_publisher_amazon_list_price_distribution = {}
        
      
        
          
          
              warehouse_date = WarehouseDate.find @document[:warehouse_date_id]
        
      
        
          
          
              major_publishers = ActiveRecord::Base.connection.execute('select type, value from warehouse_major_publishers').each_with_object({sold_by: [], publisher: []}) do |row, hash|
        
      
        
          
          
                hash[:sold_by] << row['value'] if row['type'] == 'sold_by'
        
      
        
          
          
                hash[:publisher] << row['value'] if row['type'] == 'publisher'
        
      
        
          
          
              end
        
      
        
          
          
              prices_by_warehouse_book_version_id = WarehouseStat.where(warehouse_date_id: @document[:warehouse_date_id]).value_of(:amazon_list_price, :warehouse_book_version_id).each_with_object({}) do |price_and_id, hash|
        
      
        
          
          
                hash[price_and_id[1]] = price_and_id[0]
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              sql = WarehouseListStat.joins('left outer join warehouse_categories on warehouse_list_stats.warehouse_category_id = warehouse_categories.id left outer join warehouse_book_versions on warehouse_list_stats.warehouse_book_version_id = warehouse_book_versions.id').
        
      
        
          
          
                                            select('warehouse_categories.name as category_name, warehouse_book_version_id, price, sold_by, publisher').
        
      
        
          
          
                                            where(warehouse_date_id: @document[:warehouse_date_id]).to_sql
        
      
        
          
          
              ActiveRecord::Base.connection.execute(sql).values.each do |category_name, warehouse_book_version_id, price, sold_by, publisher|
        
      
        
          
          
                major_publisher = major_publishers[:sold_by].include?(sold_by) || major_publishers[:publisher].include?(publisher)
        
      
        
          
          
                if price.present?
        
      
        
          
          
                  add_frequency_data_to_hash price_distribution, category_name, price
        
      
        
          
          
                  add_frequency_data_to_hash major_publisher_price_distribution, category_name, price if major_publisher
        
      
        
          
          
                  add_frequency_data_to_hash other_publisher_price_distribution, category_name, price unless major_publisher
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                amazon_list_price = prices_by_warehouse_book_version_id[warehouse_book_version_id.to_i]
        
      
        
          
          
                if amazon_list_price.present?
        
      
        
          
          
                  add_frequency_data_to_hash amazon_list_price_distribution, category_name, amazon_list_price
        
      
        
          
          
                  add_frequency_data_to_hash major_publisher_amazon_list_price_distribution, category_name, amazon_list_price if major_publisher
        
      
        
          
          
                  add_frequency_data_to_hash other_publisher_amazon_list_price_distribution, category_name, amazon_list_price unless major_publisher
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              output_report "price-distribution-by-category-#{warehouse_date.date.strftime('%m%d%y')}", price_distribution
        
      
        
          
          
              output_report "major-publisher-price-distribution-by-category-#{warehouse_date.date.strftime('%m%d%y')}", major_publisher_price_distribution
        
      
        
          
          
              output_report "other-publisher-price-distribution-by-category-#{warehouse_date.date.strftime('%m%d%y')}", other_publisher_price_distribution
        
      
        
          
          
              output_report "amazon-list-price-distribution-by-category-#{warehouse_date.date.strftime('%m%d%y')}", amazon_list_price_distribution
        
      
        
          
          
              output_report "major-publisher-amazon-list-price-distribution-by-category-#{warehouse_date.date.strftime('%m%d%y')}", major_publisher_amazon_list_price_distribution
        
      
        
          
          
              output_report "other-publisher-amazon-list-price-distribution-by-category-#{warehouse_date.date.strftime('%m%d%y')}", other_publisher_amazon_list_price_distribution
        
      
        
          
          
          
        
      
        
          
          
              client_config = AmazeBot.config[:reports][:clients][@client_name]
        
      
        
          
          
              InternalReportsMailer.basic_report(id, @document[:file_details], client_config[:reports][:top100_price_distribution]).deliver
        
      
        
          
          
          
        
      
        
          
          
              @document[:status] = :completed
        
      
        
          
          
              save
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def add_frequency_data_to_hash(distribution_hash, category_name, price)
        
      
        
          
          
              distribution_hash[category_name] ||= {}
        
      
        
          
          
              distribution_hash[category_name][:prices] ||= []
        
      
        
          
          
              distribution_hash[category_name][:prices] << price
        
      
        
          
          
              distribution_hash[category_name][:price_frequencies] ||= {}
        
      
        
          
          
              distribution_hash[category_name][:price_frequencies][price] ||= 0
        
      
        
          
          
              distribution_hash[category_name][:price_frequencies][price] += 1
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def output_report(report_name, distribution_hash)
        
      
        
          
          
              if distribution_hash.present?
        
      
        
          
          
                file_location_hash = {report_location: EnterpriseReports.get_report_location(@client_name, EnterpriseReports.get_filename(report_name))}
        
      
        
          
          
                range_frequencies = {}
        
      
        
          
          
                global_range_frequencies = {}
        
      
        
          
          
          
        
      
        
          
          
                distribution_hash.each do |category_name, frequency_hash|
        
      
        
          
          
                  ((0..19).to_a + (20..99).step(10).to_a + (100..999).step(100).to_a + [1000, 1000000]).each_cons(2) do |range_start, range_end|
        
      
        
          
          
                    range_start = 0.01 if range_start == 0
        
      
        
          
          
                    range_frequencies[category_name] ||= {}
        
      
        
          
          
                    price_count = frequency_hash[:price_frequencies].select {|price, _| price.to_i >= range_start * 100 && price.to_i <= range_end * 100 - 1}.values.sum
        
      
        
          
          
                    if price_count.present?
        
      
        
          
          
                      range_key = "$#{as_price(range_start * 100)} - $#{as_price(range_end * 100 - 1)}"
        
      
        
          
          
                      range_frequencies[category_name][range_key] = price_count
        
      
        
          
          
                      global_range_frequencies[range_key] ||= []
        
      
        
          
          
                      global_range_frequencies[range_key] << price_count
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                csv = EnterpriseReports.open_csv(file_location_hash)
        
      
        
          
          
                csv << ['Category Name'] + range_frequencies.first[1].keys
        
      
        
          
          
                range_frequencies.each do |name, frequencies|
        
      
        
          
          
                  csv << [name] + frequencies.values
        
      
        
          
          
                end
        
      
        
          
          
                csv << %w[Averages] + global_range_frequencies.values.collect(&:mean)
        
      
        
          
          
                csv << []
        
      
        
          
          
                csv << ['Category Name'] + range_frequencies.first[1].keys
        
      
        
          
          
          
        
      
        
          
          
                s3_url = EnterpriseReports.move_to_s3(@client_name, csv, %w[big-data-reports price-distributions])
        
      
        
          
          
                csv.close
        
      
        
          
          
                @document[:file_details][report_name] = {s3_url: s3_url}
        
      
        
          
          
              else
        
      
        
          
          
                @document[:file_details][report_name] = {error: :unable_to_generate, message: 'No price distribution data'}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class Top100ProjectedRankReport < Report
        
      
        
          1
          
            def initialize(extra = {})
        
      
        
          
          
              @client_name = :booklr
        
      
        
          
          
              extra = extra.with_indifferent_access
        
      
        
          
          
              missing_keys = []
        
      
        
          
          
              missing_keys << :warehouse_date_id if extra[:warehouse_date_id].blank?
        
      
        
          
          
              missing_keys << :asin << :isbn13 if extra[:asin].blank? && extra[:isbn13].blank?
        
      
        
          
          
              raise ArgumentError.new("Missing Keys: #{missing_keys.join(', ')}") unless missing_keys.blank?
        
      
        
          
          
          
        
      
        
          
          
              extra.merge! _id: "top100-projected-rank-#{extra[:asin] || 'X'}-#{extra[:isbn13] || 'X'}-#{extra[:warehouse_date_id]}", asin: extra[:asin], isbn13: extra[:isbn13], warehouse_date_id: extra[:warehouse_date_id]
        
      
        
          
          
          
        
      
        
          
          
              super extra
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def params
        
      
        
          
          
              @document.select {|key, _| %w[asin isbn13 warehouse_date_id].include? key}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def generate
        
      
        
          
          
              warehouse_date = WarehouseDate.find @document[:warehouse_date_id]
        
      
        
          
          
              book_version = WarehouseBookVersion.find_by(asin: @document[:asin], tld: '.com')
        
      
        
          
          
              stat = WarehouseStat.find_by(warehouse_book_version_id: book_version.id, warehouse_date_id: warehouse_date.id)
        
      
        
          
          
              if stat.blank?
        
      
        
          
          
                @document[:error] = 'Book version is valid but has no data for today, let your big data helpers know so they can investigate. Error in Top100ProjectedRankReport, this should never happen.'
        
      
        
          
          
                @document[:status] = :error_while_processing
        
      
        
          
          
              elsif stat.amazon_sales_rank.blank?
        
      
        
          
          
                @document[:error] = 'Daily stat has no amazon sales rank, cannot generate report'
        
      
        
          
          
                @document[:status] = :error_while_processing
        
      
        
          
          
              else
        
      
        
          
          
                report_name = "related-top100-ranking-#{book_version.author_name.parameterize}-#{@document[:asin]}-#{warehouse_date.date.strftime('%m%d%y')}"
        
      
        
          
          
                report_hash = EnterpriseReports.generate_report_hash(report_name, @client_name)
        
      
        
          
          
                csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          
          
          
        
      
        
          
          
                csv << ['Title', 'Author', 'Format', 'Asin', 'Sales Rank', 'Date']
        
      
        
          
          
                csv << [book_version.title, book_version.author_name, book_version.book_format, book_version.asin, stat.amazon_sales_rank, warehouse_date.date.to_s]
        
      
        
          
          
          
        
      
        
          
          
                column_headers = ['Category Name', 'Currently Ranked?', 'Your Theoretical Position', 'Best Sales Rank', 'Sales Rank Ahead of You', 'Sales Rank Behind You', 'Worst Sales Rank', '% Improvement In Sales Rank Needed to Enter Top 100']
        
      
        
          
          
                insert_projection_details csv, ['Related Categories'], column_headers, stat.get_top100_rank_projections(:related_categories), stat.amazon_sales_rank
        
      
        
          
          
                insert_projection_details csv, ['Extended Related Categories'], column_headers, stat.get_top100_rank_projections(:extended_related_categories), stat.amazon_sales_rank
        
      
        
          
          
                insert_projection_details csv, ['Similar Categories'], column_headers, stat.get_top100_rank_projections(:similar_categories), stat.amazon_sales_rank
        
      
        
          
          
                # insert_projection_details csv, ['Categories From Similar Books'], column_headers, stat.get_top100_rank_projections(:similar_books), stat
        
      
        
          
          
          
        
      
        
          
          
                @document[:file_details][report_name] = {s3_url: EnterpriseReports.move_to_s3(@client_name, csv, ['big-data-reports', warehouse_date.date.strftime('%m%d%y')])}
        
      
        
          
          
                csv.close
        
      
        
          
          
                @document[:status] = :completed
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              save
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def insert_projection_details(csv, major_header, column_headers, projection_details, amazon_sales_rank)
        
      
        
          
          
              csv << []
        
      
        
          
          
              csv << major_header if major_header.present?
        
      
        
          
          
              csv << column_headers
        
      
        
          
          
              projection_details.sort_by {|_, position_data| position_data[:name]}.each do |_, position_data|
        
      
        
          
          
                csv << [position_data[:name],
        
      
        
          
          
                        position_data[:currently_ranked] ? 'X' : nil,
        
      
        
          
          
                        (position_data[:position] == 101 ? 'Unranked' : position_data[:position]),
        
      
        
          
          
                        position_data[:best_sales_rank],
        
      
        
          
          
                        position_data[:next_sales_rank],
        
      
        
          
          
                        (position_data[:prev_sales_rank] == 0 ? nil : position_data[:prev_sales_rank]),
        
      
        
          
          
                        position_data[:worst_sales_rank],
        
      
        
          
          
                        (position_data[:position] == 101 ? "#{((amazon_sales_rank - position_data[:next_sales_rank] - 1) / amazon_sales_rank.to_f * 100).round}%" : nil)]
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class TrackedBookVersion < ActiveRecord::Base
        
      
        
          
          
            # Attributes
        
      
        
          
          
          
        
      
        
          1
          
            attr_accessible :warehouse_book_version, :warehouse_book_version_id, :user, :user_id, :metadata
        
      
        
          
          
          
        
      
        
          1
          
            serialize :metadata
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          
          
          
        
      
        
          1
          
            belongs_to :warehouse_book_version, inverse_of: :tracked_book_versions
        
      
        
          1
          
            belongs_to :user, inverse_of: :tracked_book_versions
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          
          
          
        
      
        
          1
          
            validates_presence_of :warehouse_book_version_id, :user_id
        
      
        
          1
          
            validates_uniqueness_of :warehouse_book_version_id, scope: :user_id
        
      
        
          
          
          
        
      
        
          
          
            # Miscellaneous
        
      
        
          
          
          
        
      
        
          1
          
            def get_book_version_asin
        
      
        
          15
          
              warehouse_book_version.asin || metadata.try(:[], 0) || 'No data'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_book_version_isbn13
        
      
        
          1
          
              warehouse_book_version.isbn13
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_book_title
        
      
        
          9
          
              warehouse_book_version.title || metadata.try(:[], 1) || 'No data'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_author_name
        
      
        
          9
          
              warehouse_book_version.author_name || metadata.try(:[], 2) || 'No data'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_division_code
        
      
        
          8
          
              metadata.try(:[], 3) || 'No data'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_book_format
        
      
        
          8
          
              warehouse_book_version.book_format || 'No data'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def match_url_on_search_page_from_metadata_asin(urls)
        
      
        
          4
          
              urls.select {|url| get_book_version_asin.length == 10 && ScraperUtilities.extract_asin_from_url(url) == get_book_version_asin}.first
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class User < ActiveRecord::Base
        
      
        
          
          
            # Include default devise modules. Others available are:
        
      
        
          
          
            # :token_authenticatable, :encryptable, :confirmable, :lockable, :timeoutable and :omniauthable
        
      
        
          1
          
            devise :database_authenticatable, :rememberable, :trackable, :validatable
        
      
        
          
          
          
        
      
        
          
          
            # Setup accessible (or protected) attributes for your model
        
      
        
          1
          
            attr_accessible :email, :password, :password_confirmation, :remember_me, :name, :validate_tracked_book_versions, :validate_tracked_book_versions_on_itunes, :tld
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          
          
          
        
      
        
          1
          
            has_many :tracked_book_versions, inverse_of: :user, dependent: :destroy
        
      
        
          1
          
            has_many :warehouse_book_versions, through: :tracked_book_versions
        
      
        
          
          
          
        
      
        
          
          
            # Only add roles to the end of this array
        
      
        
          
          
            # Check out Railscast #189
        
      
        
          1
          
            bitmask :roles_mask, :values => ['admin'], :as => :roles
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          
          
          
        
      
        
          1
          
            validates_presence_of :tld, :name
        
      
        
          1
          
            validates_inclusion_of :tld, in: Utilities::TLDS
        
      
        
          
          
          
        
      
        
          1
          
            def has_role?(role)
        
      
        
          4
          
              roles.include? role.to_s
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def admin?
        
      
        
          2
          
              has_role? :admin
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class WarehouseBookVersion < ActiveRecord::Base
        
      
        
          1
          
            mount_uploader :book_version_image, BookVersionImageUploader
        
      
        
          
          
          
        
      
        
          1
          
            STATUSES = [:new, :validated, :validated_from_top_100s, :ready_for_amazon_ingestion, :ingested, :page_not_found, :throttled, :external_error, :invalid_key, :invalid_key_type, :invalid_on_amazon, :no_author, :no_format, :no_results, :ambiguous_results, :duplicate_asin, :duplicate_isbn13, :duplicate_bn_id, :misassigned_asin, :api_call_in_progress].freeze
        
      
        
          
          
          
        
      
        
          1
          
            attr_accessible :asin, :bn_id, :book_format, :isbn13, :pub_date, :status, :title, :publisher, :sold_by, :pages, :physical_details, :author_name, :author_asin, :itunes_id, :tld, :duplicate_key, :source, :itunes_pub_date, :itunes_genres, :canonical_amazon_url, :canonical_bn_url, :canonical_goodreads_url, :amazon_book_description
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          1
          
            has_many :tracked_book_versions, inverse_of: :warehouse_book_version, dependent: :destroy
        
      
        
          1
          
            has_many :users, through: :tracked_book_versions
        
      
        
          1
          
            has_many :book_version_exceptions, inverse_of: :warehouse_book_version, dependent: :destroy
        
      
        
          1
          
            has_many :warehouse_stats, inverse_of: :warehouse_book_version, dependent: :destroy
        
      
        
          1
          
            has_many :warehouse_list_stats, inverse_of: :warehouse_book_version
        
      
        
          1
          
            has_many :book_version_exceptions, inverse_of: :warehouse_book_version, dependent: :destroy
        
      
        
          1
          
            has_one :amazon_api_response, inverse_of: :warehouse_book_version, dependent: :destroy
        
      
        
          1
          
            has_many :amazon_api_response_items, through: :amazon_api_response
        
      
        
          1
          
            has_many :amazon_api_lookup_response_items, lambda {order :response_rank}, through: :amazon_api_response
        
      
        
          1
          
            has_many :amazon_api_search_response_items, lambda {order :response_rank}, through: :amazon_api_response
        
      
        
          1
          
            has_many :book_version_categories, inverse_of: :warehouse_book_version
        
      
        
          1
          
            has_many :best_rank_category_stats, inverse_of: :best_rank_book_version
        
      
        
          1
          
            has_many :worst_rank_category_stats, inverse_of: :worst_rank_book_version
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          1
          
            validates_uniqueness_of :asin, allow_nil: true, scope: :tld
        
      
        
          1
          
            validates_uniqueness_of :isbn13, allow_nil: true, scope: :tld
        
      
        
          1
          
            validates_uniqueness_of :bn_id, allow_nil: true, scope: :tld
        
      
        
          1
          
            validates_uniqueness_of :itunes_id, allow_nil: true, scope: :tld
        
      
        
          1
          
            validates_presence_of :status, :tld
        
      
        
          1
          
            validates_inclusion_of :tld, in: Utilities::TLDS
        
      
        
          1
          
            validates_inclusion_of :status, in: STATUSES
        
      
        
          
          
          
        
      
        
          
          
            # Scopes
        
      
        
          29
          
            scope :ingested, lambda {where(status: 'ingested')}
        
      
        
          1
          
            scope :not_ingested, lambda {where{status != :ingested}}
        
      
        
          33
          
            scope :com, lambda {where(tld: '.com')}
        
      
        
          1
          
            scope :couk, lambda {where(tld: '.co.uk')}
        
      
        
          
          
          
        
      
        
          6
          
            scope :statable, lambda {ingested}
        
      
        
          5
          
            scope :amazon_statable, lambda {statable.where{asin != nil}}
        
      
        
          3
          
            scope :bn_statable, lambda {statable.where{(isbn13 != nil) | (bn_id != nil)}}
        
      
        
          3
          
            scope :itunes_statable, lambda {statable.where{itunes_id != nil}}
        
      
        
          3
          
            scope :goodreads_statable, lambda {statable.where{(asin != nil) | (canonical_goodreads_url != nil) | (isbn13 != nil) | (bn_id != nil)}}
        
      
        
          
          
          
        
      
        
          
          
            # Callbacks
        
      
        
          
          
          
        
      
        
          1
          
            after_commit :validate_on_amazon, on: :create
        
      
        
          1
          
            after_commit :add_asin_to_mongo_asin_list, on: :create
        
      
        
          
          
          
        
      
        
          1
          
            def validate_on_amazon
        
      
        
          2
          
              BookVersionValidationWorkers::ValidateNewBookVersion.perform_async id if status == :new
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def add_asin_to_mongo_asin_list
        
      
        
          2
          
              MongoUtilities.add_documents_to_all_asin_list(MongoUtilities.all_asin_document asin, tld) if asin.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Miscellaneous
        
      
        
          
          
          
        
      
        
          1
          
            def self.scope_to_worker_class(scope)
        
      
        
          33
          
              case scope.to_s
        
      
        
          
          
                when 'amazon_statable'
        
      
        
          20
          
                  MongoWorkers::GetAmazonProductPageStats
        
      
        
          
          
                when 'bn_statable'
        
      
        
          4
          
                  MongoWorkers::GetBarnesAndNobleStats
        
      
        
          
          
                when 'itunes_statable'
        
      
        
          4
          
                  MongoWorkers::GetItunesStats
        
      
        
          
          
                when 'goodreads_statable'
        
      
        
          4
          
                  MongoWorkers::GetGoodreadsStats
        
      
        
          
          
                else
        
      
        
          1
          
                  raise ArgumentError.new('Bad scraping scope')
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def ingested?
        
      
        
          2
          
              status == :ingested
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def status
        
      
        
          4014
          
              read_attribute(:status).try(:to_sym)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def isbn_or_asin
        
      
        
          86
          
              isbn13 || asin
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def ean
        
      
        
          5
          
              isbn13 || bn_id
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def amazon_url
        
      
        
          3
          
              Urls.amazon_book_page asin, tld
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def bn_url
        
      
        
          1
          
              Urls.bn_book_page ean
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def copy_associations_from(other_warehouse_book_version)
        
      
        
          4
          
              users_diff = other_warehouse_book_version.users - self.users
        
      
        
          4
          
              self.users << users_diff if users_diff.present?
        
      
        
          4
          
              BookVersionException.where(warehouse_book_version_id: other_warehouse_book_version.id).update_all(warehouse_book_version_id: self.id)
        
      
        
          4
          
              WarehouseListStat.where(warehouse_book_version_id: other_warehouse_book_version.id).update_all(warehouse_book_version_id: self.id)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def resolve_duplicate_key(existing_warehouse_book_version, duplicate_key)
        
      
        
          3
          
              other_key = (duplicate_key == :isbn13) ? :asin : :isbn13
        
      
        
          
          
          
        
      
        
          3
          
              if existing_warehouse_book_version.status != :ingested
        
      
        
          
          
                # Transfer associations from existing book_version to this one
        
      
        
          1
          
                copy_associations_from(existing_warehouse_book_version)
        
      
        
          1
          
                existing_warehouse_book_version.destroy
        
      
        
          1
          
                false
        
      
        
          2
          
              elsif existing_warehouse_book_version.send(other_key).blank?
        
      
        
          
          
                # Transfer associations from this book_version to existing ingested book
        
      
        
          1
          
                existing_warehouse_book_version.copy_associations_from(self)
        
      
        
          1
          
                key_to_transfer = self.send(other_key)
        
      
        
          1
          
                destroy
        
      
        
          1
          
                existing_warehouse_book_version.update_attributes(:"#{other_key}" => key_to_transfer)
        
      
        
          1
          
                true
        
      
        
          1
          
              elsif existing_warehouse_book_version.send(other_key) != self.send(other_key)
        
      
        
          
          
                # if the corresponding key doesn't match on both then set this as a duplicate status and set its duplicate key so we can resolve it later
        
      
        
          1
          
                self.duplicate_key = self.send(duplicate_key)
        
      
        
          1
          
                self.status = :"duplicate_#{duplicate_key}"
        
      
        
          1
          
                send "#{duplicate_key}=", nil
        
      
        
          1
          
                false
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def update_amazon_api_response(options = {})
        
      
        
          15
          
              if options[:items].blank?
        
      
        
          12
          
                key = options[:key_method].present? ? self.send(options[:key_method]) : isbn_or_asin
        
      
        
          12
          
                key_type = options[:key_method] || Utilities.determine_key_type(isbn_or_asin)
        
      
        
          12
          
                items_hash = AmazonApi.get_all_items_by_keys_and_tld(key, key_type, tld)
        
      
        
          12
          
                items = items_hash[key]
        
      
        
          
          
              else
        
      
        
          3
          
                items = options[:items]
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          15
          
              if amazon_api_response.blank?
        
      
        
          13
          
                create_amazon_api_response
        
      
        
          
          
              else
        
      
        
          2
          
                AmazonAPIResponseItem.where(amazon_api_response_id: amazon_api_response.id).delete_all
        
      
        
          2
          
                amazon_api_response.touch
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          15
          
              [:lookup, :search].each do |query_type|
        
      
        
          30
          
                num = 1
        
      
        
          30
          
                items[query_type].each do |item|
        
      
        
          3
          
                  attributes = [:asin, :author, :binding, :brand, :creator, :ean, :eisbn,
        
      
        
          
          
                                :isbn, :item_dimensions_height, :item_dimensions_height_unit, :item_dimensions_length, :item_dimensions_length_unit,
        
      
        
          
          
                                :item_dimensions_weight, :item_dimensions_weight_unit, :item_dimensions_width, :item_dimensions_width_unit,
        
      
        
          
          
                                :label, :large_image_url, :list_price_amount, :list_price_currency_code, :manufacturer, :medium_image_url,
        
      
        
          
          
                                :number_of_pages, :package_dimensions_height, :package_dimensions_height_unit, :package_dimensions_length,
        
      
        
          
          
                                :package_dimensions_length_unit, :package_dimensions_weight, :package_dimensions_weight_unit, :package_dimensions_width,
        
      
        
          
          
                                :package_dimensions_width_unit, :publication_date, :publisher, :sales_rank, :small_image_url, :studio,
        
      
        
          
          
                                :title].each_with_object({}) do |attribute, hash|
        
      
        
          111
          
                    hash[attribute] = AmazonApi.send "get_#{attribute}_from_item", item
        
      
        
          
          
                  end
        
      
        
          3
          
                  amazon_api_response.amazon_api_response_items.create attributes.merge({response_rank: num, query_type: query_type})
        
      
        
          3
          
                  num += 1
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          15
          
              if status != :ingested
        
      
        
          14
          
                if items[:status] == :throttled
        
      
        
          1
          
                  update_attributes status: :validated
        
      
        
          
          
                elsif items[:status] != :external_error
        
      
        
          12
          
                  update_attributes status: items[:status]
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          15
          
              items[:status]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def ingest
        
      
        
          11
          
              if status == :ready_for_amazon_ingestion
        
      
        
          10
          
                self.status = populate_attributes_from_amazon_api
        
      
        
          10
          
                if status == :ingested
        
      
        
          8
          
                  if asin.present? && WarehouseBookVersion.where {(asin == my{asin}) & (tld == my{tld}) & (id != my{id})}.exists?
        
      
        
          2
          
                    Rails.logger.tagged('book_data') {Rails.logger.info "ISBN13: #{isbn13} resolved to duplicate ASIN: #{asin}"}
        
      
        
          1
          
                    existing_warehouse_book_version = WarehouseBookVersion.find_by(asin: asin, tld: tld)
        
      
        
          1
          
                    return if resolve_duplicate_key(existing_warehouse_book_version, :asin) # destroys this book versions if resolve_duplicate_key returns true
        
      
        
          8
          
                  elsif isbn13.present? && WarehouseBookVersion.where {(isbn13 == my{isbn13}) & (tld == my{tld}) & (id != my{id})}.exists?
        
      
        
          2
          
                    Rails.logger.tagged('book_data') {Rails.logger.info "ASIN: #{asin} resolved to duplicate ISBN13: #{isbn13}"}
        
      
        
          1
          
                    existing_warehouse_book_version = WarehouseBookVersion.find_by(isbn13: isbn13, tld: tld)
        
      
        
          1
          
                    return if resolve_duplicate_key(existing_warehouse_book_version, :isbn13) # destroys this book versions if resolve_duplicate_key returns true
        
      
        
          
          
                  end
        
      
        
          
          
                  # Lookup iTunes id from API for this book if its a Kindle Edition
        
      
        
          4
          
                  Rails.logger.tagged('book_data') {Rails.logger.info "WarehouseBookVersion #{id} with isbn_or_asin #{isbn_or_asin} successfully ingested"}
        
      
        
          
          
                else
        
      
        
          
          
                  # Resolve duplicate keys for invalid books
        
      
        
          14
          
                  if asin.present? && WarehouseBookVersion.where {(asin == my{asin}) & (tld == my{tld}) & (id != my{id})}.exists?
        
      
        
          2
          
                    self.duplicate_key = self.asin
        
      
        
          2
          
                    self.asin = nil
        
      
        
          8
          
                  elsif isbn13.present? && WarehouseBookVersion.where {(isbn13 == my{isbn13}) & (tld == my{tld}) & (id != my{id})}.exists?
        
      
        
          2
          
                    self.duplicate_key = self.isbn13
        
      
        
          2
          
                    self.isbn13 = nil
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
                # Saving the status on the book_version
        
      
        
          8
          
                save!
        
      
        
          
          
          
        
      
        
          
          
                # Ensure the book was ingested correctly before queuing extra data
        
      
        
          8
          
                if status == :ingested
        
      
        
          2
          
                  BookVersionWorkers::GetItunesMetadata.perform_async id if isbn13.present? && book_format.include?("Kindle")
        
      
        
          2
          
                  BookVersionWorkers::PopulateWebData.perform_async id, asin, tld, author_name
        
      
        
          2
          
                  BookVersionWorkers::PopulateCanonicalUrls.perform_async id
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def populate_attributes_from_amazon_api(validate_asin = true)
        
      
        
          23
          
              response_item = nil
        
      
        
          23
          
              count = 0
        
      
        
          
          
          
        
      
        
          23
          
              loop do
        
      
        
          34
          
                response_item = amazon_api_response.matching_response_item
        
      
        
          34
          
                break if response_item.present?
        
      
        
          
          
          
        
      
        
          12
          
                count = count+1
        
      
        
          12
          
                sleep(3)
        
      
        
          12
          
                raise 'No Matching Response Items Found' if count > 10
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          22
          
              return response_item if response_item.class == Symbol
        
      
        
          
          
          
        
      
        
          
          
              # set keys
        
      
        
          19
          
              if self.asin.blank?
        
      
        
          16
          
                self.asin = response_item.asin
        
      
        
          
          
              elsif validate_asin && asin != response_item.asin
        
      
        
          2
          
                return :misassigned_asin
        
      
        
          
          
              elsif !validate_asin && asin != response_item.asin
        
      
        
          1
          
                self.asin = response_item.asin
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          17
          
              self.isbn13 = response_item.isbn13 if isbn13.blank?
        
      
        
          
          
          
        
      
        
          17
          
              if self.isbn13.present? && !ISBN_Tools.is_valid_isbn13?(self.isbn13)
        
      
        
          15
          
                if WarehouseBookVersion.where {(isbn13 == my{isbn13}) & (tld == my{tld}) & (id != my{id})}.exists?
        
      
        
          1
          
                  self.duplicate_key = self.isbn13
        
      
        
          1
          
                  self.isbn13 = nil
        
      
        
          
          
                end
        
      
        
          3
          
                return :invalid_key
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # Assign book type (hardcover, paperback, etc) and title
        
      
        
          14
          
              self.book_format = response_item.get_binding
        
      
        
          14
          
              if book_format.blank?
        
      
        
          4
          
                Rails.logger.tagged('book_data') {Rails.logger.info "no format, populate_attributes_from_amazon failure for book version: #{isbn_or_asin}"}
        
      
        
          2
          
                return :no_format
        
      
        
          
          
              end
        
      
        
          
          
              # return first author, if author doesn't exist get creator (editor, etc)
        
      
        
          12
          
              author_name = response_item.author_name.try(:first, 255)
        
      
        
          12
          
              if author_name.blank?
        
      
        
          2
          
                Rails.logger.tagged('book_data') {Rails.logger.info "no author, populate_attributes_from_amazon failure for book version: #{isbn_or_asin}"}
        
      
        
          1
          
                return :no_author
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          11
          
              self.pages = response_item.number_of_pages
        
      
        
          11
          
              self.publisher = response_item.publisher
        
      
        
          11
          
              self.physical_details = response_item.physical_details
        
      
        
          11
          
              BookVersionWorkers::DownloadImage.perform_async id, response_item.medium_image_url if response_item.medium_image_url.present?
        
      
        
          11
          
              self.pub_date = response_item.publication_date
        
      
        
          
          
          
        
      
        
          11
          
              self.author_name = author_name
        
      
        
          11
          
              self.title = response_item.title
        
      
        
          
          
          
        
      
        
          11
          
              :ingested
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def similar_warehouse_book_version_ids
        
      
        
          
          
              frequencies = BookVersionCategory.where(category_name: book_version_categories.collect(&:category_name)).where.not(warehouse_book_version_id: id).each_with_object(Hash.new(0)) do |element, hash|
        
      
        
          
          
                hash[element.warehouse_book_version_id] += 1
        
      
        
          
          
              end
        
      
        
          
          
              frequencies.select {|_, count| count > 2}.keys.uniq.presence || frequencies.select {|_, count| count > 1}.keys.uniq
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def similar_categories
        
      
        
          
          
              category_prefix = book_format.include?('Kindle') ? 'Kindle%' : 'Book%'
        
      
        
          
          
              book_version_frequencies = BookVersionCategory.where('category_name LIKE ?', category_prefix).where(category_name: book_version_categories.collect(&:category_name)).where.not(warehouse_book_version_id: id).each_with_object(Hash.new(0)) do |element, hash|
        
      
        
          
          
                hash[element.warehouse_book_version_id] += 1
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              ids = (2..(book_version_frequencies.values.max || 0)).to_a.reverse.collect do |num|
        
      
        
          
          
                book_version_frequencies.select {|_, count| count == num}.keys.uniq.presence
        
      
        
          
          
              end.flatten.first(50)
        
      
        
          
          
          
        
      
        
          
          
              BookVersionCategory.where(warehouse_book_version_id: ids).where('category_name LIKE ?', category_prefix).where.not(category_name: book_version_categories.collect(&:category_name)).each_with_object(Hash.new(0)) do |element, hash|
        
      
        
          
          
                hash[element.category_name] += 1
        
      
        
          
          
              end.sort {|x, y| y[1] <=> x[1]}.first(25).collect {|x, _| x}
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class WarehouseCategory < ActiveRecord::Base
        
      
        
          
          
            # Constants
        
      
        
          
          
          
        
      
        
          1
          
            STATUSES = %w[canonical alternative deleted].freeze
        
      
        
          
          
          
        
      
        
          
          
            # Attributes
        
      
        
          
          
          
        
      
        
          1
          
            attr_accessible :category_id, :depth, :name, :category_type, :tx_book_category_id, :status, :tld, :parent_id
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          
          
          
        
      
        
          1
          
            has_many :warehouse_stats, inverse_of: :warehouse_category
        
      
        
          1
          
            has_many :warehouse_list_stats, inverse_of: :warehouse_category
        
      
        
          1
          
            belongs_to :parent, class_name: 'WarehouseCategory', inverse_of: :children
        
      
        
          1
          
            has_many :children, class_name: 'WarehouseCategory', foreign_key: 'parent_id', inverse_of: :parent
        
      
        
          1
          
            belongs_to :canonical_category, class_name: 'WarehouseCategory', inverse_of: :alternative_categories
        
      
        
          
          
            # Currently grabs alternative and deleted categories, despite association name. Couldn't think of a good name to encompass both
        
      
        
          1
          
            has_many :alternative_categories, class_name: 'WarehouseCategory', foreign_key: 'canonical_category_id', inverse_of: :canonical_category
        
      
        
          1
          
            has_many :book_version_categories, inverse_of: :warehouse_category
        
      
        
          1
          
            has_many :warehouse_book_versions, through: :book_version_categories
        
      
        
          1
          
            has_many :category_stats, inverse_of: :warehouse_category
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          
          
          
        
      
        
          1
          
            validates_presence_of :name, :category_type
        
      
        
          1
          
            validates_inclusion_of :tld, in: Utilities::TLDS
        
      
        
          
          
          
        
      
        
          1
          
            validates_presence_of :status, :tld, if: :amazon_category?
        
      
        
          1
          
            validates_inclusion_of :status, in: STATUSES + STATUSES.collect(&:to_sym), if: :amazon_category?
        
      
        
          1
          
            validate :uniqueness_of_canonical_category_id_and_name, if: :amazon_category?
        
      
        
          1
          
            validate :uniqueness_of_name_per_category_id_and_tld, if: :amazon_category?
        
      
        
          
          
          
        
      
        
          
          
            # Scopes
        
      
        
          
          
          
        
      
        
          719
          
            scope :canonical, lambda {where(status: 'canonical')}
        
      
        
          5
          
            scope :alternative, lambda {where(status: 'alternative')}
        
      
        
          1
          
            scope :active, lambda {where{(status == 'canonical') | (status == 'alternative')}}
        
      
        
          1
          
            scope :deleted, lambda {where(status: 'deleted')}
        
      
        
          15
          
            scope :com, lambda {where(tld: '.com')}
        
      
        
          1
          
            scope :couk, lambda {where(tld: '.co.uk')}
        
      
        
          391
          
            scope :amazon, lambda {where(category_type: 'AmazonBookCategory')}
        
      
        
          1
          
            scope :barnes_and_noble, lambda {where(category_type: 'BarnesNobleBookCategory')}
        
      
        
          2
          
            scope :apple, lambda {where(category_type: 'AppleBookCategory')}
        
      
        
          
          
          
        
      
        
          
          
            # Miscellaneous
        
      
        
          
          
          
        
      
        
          1
          
            def uniqueness_of_canonical_category_id_and_name
        
      
        
          140
          
              unless status.blank?
        
      
        
          140
          
                if category_id.present?
        
      
        
          140
          
                  %w[category_id name].each do |attribute|
        
      
        
          692
          
                    if status.to_sym == :canonical && WarehouseCategory.amazon.canonical.where(:"#{attribute}" => send(attribute), tld: tld).where{id != my{id}}.exists?
        
      
        
          2
          
                      errors.add :"#{attribute}", 'has already been taken'
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def uniqueness_of_name_per_category_id_and_tld
        
      
        
          420
          
              errors.add :name, 'has already been taken for this category id and tld' if WarehouseCategory.amazon.where(name: name, category_id: category_id, tld: tld).where{id != my{id}}.exists?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Adds in canonical?, alternative?, and deleted?
        
      
        
          1
          
            def method_missing(method_name, *args, &block)
        
      
        
          5
          
              method_name =~ /^(#{STATUSES.join('|')})\?$/ ? status == $1 : super
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def canonical?
        
      
        
          18
          
              status == 'canonical'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def amazon_category?
        
      
        
          786
          
              category_type == 'AmazonBookCategory'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def bn_physical_category?
        
      
        
          3
          
              category_type == 'BarnesNobleBookCategory' && category_id.starts_with?('1')
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def leaf_name
        
      
        
          1
          
              name.split('>').last.strip
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def set_parent_id
        
      
        
          34
          
              self.parent_id = WarehouseCategory.get_parent_id_for_category_name name, tld
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.get_parent_id_for_category_name(category_name, tld)
        
      
        
          39
          
              if category_name.count('>') > 0
        
      
        
          38
          
                parents = where name: category_name.split(/ > /)[0..-2].join(' > '), tld: tld
        
      
        
          57
          
                parent = parents.select {|category| category.canonical?}.first || parents.select {|category| category.status == 'alternative'}.first || parents.first
        
      
        
          38
          
                parent.id if parent.present?
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def related_categories
        
      
        
          1
          
              WarehouseCategory.related_categories self
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.related_categories(categories)
        
      
        
          4
          
              categories_array = Array.wrap categories
        
      
        
          4
          
              all_category_ids = (categories_array.collect(&:id) + categories_array.collect(&:parent_id).compact).uniq
        
      
        
          4
          
              canonical_category_id_by_alternative_parent_id = WarehouseCategory.alternative.com.where(parent_id: all_category_ids).value_of(:parent_id, :canonical_category_id).each_with_object({}) do |values, hash|
        
      
        
          2
          
                hash[values[0]] ||= []
        
      
        
          2
          
                hash[values[0]] << values[1]
        
      
        
          
          
              end
        
      
        
          10
          
              canonical_category_ids = all_category_ids.collect {|category_id| canonical_category_id_by_alternative_parent_id[category_id]}.flatten.compact.uniq
        
      
        
          
          
          
        
      
        
          4
          
              (WarehouseCategory.canonical.where(parent_id: all_category_ids) + WarehouseCategory.find(canonical_category_ids)).uniq
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def average_daily_top100(column, warehouse_date)
        
      
        
          
          
              WarehouseListStat.where(warehouse_category_id: id, warehouse_date_id: warehouse_date.id).average(column)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def average_daily(column, warehouse_date)
        
      
        
          
          
              WarehouseStat.where(warehouse_book_version_id: warehouse_book_version_ids, warehouse_date_id: warehouse_date.id).average(column)
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class WarehouseDate < ActiveRecord::Base
        
      
        
          1
          
            attr_accessible :date
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          1
          
            has_many :warehouse_stats, inverse_of: :warehouse_date
        
      
        
          1
          
            has_many :warehouse_list_stats, inverse_of: :warehouse_date
        
      
        
          1
          
            has_many :book_version_exceptions, inverse_of: :warehouse_date
        
      
        
          1
          
            has_many :warehouse_categories, inverse_of: :warehouse_date
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          1
          
            validates_presence_of :date
        
      
        
          
          
          
        
      
        
          1
          
            def self.current
        
      
        
          
          
              find_by(date: Date.current)
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class WarehouseListStat < ActiveRecord::Base
        
      
        
          1
          
            attr_accessible :warehouse_book_version, :warehouse_book_version_id, :warehouse_category, :warehouse_category_id, :warehouse_date, :warehouse_date_id, :warehouse_trend, :warehouse_trend_id, :days_in_top_100, :name, :rank, :price, :author, :title, :asin, :isbn, :bn_id, :itunes_id, :list_type
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          1
          
            belongs_to :warehouse_category, inverse_of: :warehouse_list_stats
        
      
        
          1
          
            belongs_to :warehouse_book_version, inverse_of: :warehouse_list_stats
        
      
        
          1
          
            belongs_to :warehouse_trend, inverse_of: :warehouse_list_stats
        
      
        
          1
          
            belongs_to :warehouse_date, inverse_of: :warehouse_list_stats
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          1
          
            validates_presence_of :warehouse_category_id, :warehouse_date_id, :rank
        
      
        
          
          
          end

    
      
        
          1
          
          class WarehouseRegion < ActiveRecord::Base
        
      
        
          1
          
            has_many :warehouse_stats, inverse_of: :warehouse_region
        
      
        
          1
          
            has_many :book_version_categories, inverse_of: :warehouse_region
        
      
        
          1
          
            has_many :warehouse_categories, inverse_of: :warehouse_region
        
      
        
          
          
          
        
      
        
          1
          
            attr_accessible :tld
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          1
          
            validates_presence_of :tld
        
      
        
          
          
          
        
      
        
          
          
            # Miscellaneous
        
      
        
          
          
          
        
      
        
          1
          
            def self.com
        
      
        
          22
          
              find_by tld: '.com'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.couk
        
      
        
          4
          
              find_by tld: '.co.uk'
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class WarehouseStat < ActiveRecord::Base
        
      
        
          1
          
            belongs_to :warehouse_book_version, inverse_of: :warehouse_stats
        
      
        
          1
          
            belongs_to :warehouse_date, inverse_of: :warehouse_stats
        
      
        
          1
          
            belongs_to :warehouse_region, inverse_of: :warehouse_stats
        
      
        
          
          
          
        
      
        
          
          
            # main sales rank + 3 sub category rankings
        
      
        
          1
          
            belongs_to :warehouse_amazon_sales_rank_category, :class_name => 'WarehouseCategory', :foreign_key => 'warehouse_amazon_sales_rank_category_id', inverse_of: :warehouse_stats
        
      
        
          1
          
            belongs_to :warehouse_amazon_category1, :class_name => 'WarehouseCategory', :foreign_key => 'warehouse_amazon_category1_id', inverse_of: :warehouse_stats
        
      
        
          1
          
            belongs_to :warehouse_amazon_category2, :class_name => 'WarehouseCategory', :foreign_key => 'warehouse_amazon_category2_id', inverse_of: :warehouse_stats
        
      
        
          1
          
            belongs_to :warehouse_amazon_category3, :class_name => 'WarehouseCategory', :foreign_key => 'warehouse_amazon_category3_id', inverse_of: :warehouse_stats
        
      
        
          
          
          
        
      
        
          1
          
            belongs_to :sub_category1_author_rank_id, :class_name => 'WarehouseCategory', :foreign_key => 'sub_category1_author_rank_id'
        
      
        
          1
          
            belongs_to :sub_category2_author_rank_id, :class_name => 'WarehouseCategory', :foreign_key => 'sub_category2_author_rank_id'
        
      
        
          1
          
            belongs_to :sub_category3_author_rank_id, :class_name => 'WarehouseCategory', :foreign_key => 'sub_category3_author_rank_id'
        
      
        
          1
          
            belongs_to :sub_category4_author_rank_id, :class_name => 'WarehouseCategory', :foreign_key => 'sub_category4_author_rank_id'
        
      
        
          
          
          
        
      
        
          1
          
            attr_accessible :warehouse_book_version, :warehouse_date, :warehouse_region,
        
      
        
          
          
                            :warehouse_region_id, :warehouse_amazon_category1_id, :warehouse_amazon_category2_id,
        
      
        
          
          
                            :warehouse_amazon_category3_id, :amazon_aus_price, :amazon_average_rating, :amazon_digital_list_price,
        
      
        
          
          
                            :amazon_euro_price, :amazon_likes, :amazon_list_price, :amazon_price, :amazon_review_count, :amazon_sales_rank,
        
      
        
          
          
                            :warehouse_amazon_sales_rank_category_id, :bn_average_rating, :bn_list_price, :bn_nook_list_price, :bn_nook_price,
        
      
        
          
          
                            :bn_price, :bn_review_count, :bn_sales_rank, :amazon_category1_rank, :amazon_category2_rank, :amazon_category3_rank,
        
      
        
          
          
                            :amazon_days_in_top_100_1, :amazon_days_in_top_100_2, :amazon_days_in_top_100_3, :amazon_top_100_trend1,
        
      
        
          
          
                            :amazon_top_100_trend2, :amazon_top_100_trend3, :delicious_count, :digg_count, :fb_click_count, :fb_comment_count,
        
      
        
          
          
                            :fb_commentsbox_count, :fb_like_count, :fb_share_count, :google_plus_count, :itunes_average_rating,
        
      
        
          
          
                            :itunes_price, :itunes_rating_count, :linkedin_count, :pinterest_count, :reddit_count, :stumbleupon_count,
        
      
        
          
          
                            :twitter_count, :five_star_count, :four_star_count, :three_star_count, :two_star_count, :one_star_count, :tx_book_version_stat_id,
        
      
        
          
          
                            :overall_author_rank, :sub_category1_author_rank, :sub_category1_author_rank_id, :sub_category2_author_rank,
        
      
        
          
          
                            :sub_category2_author_rank_id, :sub_category3_author_rank, :sub_category3_author_rank_id, :sub_category4_author_rank,
        
      
        
          
          
                            :sub_category4_author_rank_id, :amazon_availability, :kindle_unlimited
        
      
        
          
          
          
        
      
        
          
          
            # Delegations
        
      
        
          1
          
            delegate :date, to: :warehouse_date
        
      
        
          1
          
            delegate :tld, :asin, :isbn13, to: :warehouse_book_version
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          1
          
            validates_presence_of :warehouse_book_version
        
      
        
          1
          
            validates_presence_of :warehouse_date
        
      
        
          1
          
            validates_presence_of :warehouse_region
        
      
        
          
          
          
        
      
        
          
          
            # Constants
        
      
        
          1
          
            WAREHOUSE_STAT_FIELDS = %w[warehouse_amazon_category1_id amazon_category1_rank warehouse_amazon_category2_id amazon_category2_rank warehouse_amazon_category3_id amazon_category3_rank tx_book_version_stat_id warehouse_date_id warehouse_region_id warehouse_book_version_id warehouse_amazon_sales_rank_category_id amazon_sales_rank bn_sales_rank amazon_likes amazon_list_price amazon_price amazon_digital_list_price bn_nook_price bn_nook_list_price amazon_euro_price amazon_aus_price bn_price bn_list_price itunes_price itunes_average_rating itunes_rating_count stumbleupon_count reddit_count fb_commentsbox_count fb_click_count fb_comment_count fb_like_count fb_share_count delicious_count google_plus_count twitter_count digg_count pinterest_count linkedin_count amazon_average_rating amazon_review_count bn_average_rating bn_review_count five_star_count four_star_count three_star_count two_star_count one_star_count overall_author_rank sub_category1_author_rank sub_category1_author_rank_id sub_category2_author_rank sub_category2_author_rank_id sub_category3_author_rank sub_category3_author_rank_id sub_category4_author_rank sub_category4_author_rank_id itunes_gb_average_rating itunes_gb_rating_count itunes_gb_price itunes_au_average_rating itunes_au_rating_count itunes_au_price amazon_availability kindle_unlimited].freeze
        
      
        
          1
          
            WAREHOUSE_AMAZON_ALSO_BOUGHT_FIELDS = %w[amazon_also_bought_title_1 amazon_also_bought_asin_1 amazon_also_bought_author_1 amazon_also_bought_price_1 amazon_also_bought_rating_1 amazon_also_bought_star_count_1 amazon_also_bought_title_2 amazon_also_bought_asin_2 amazon_also_bought_author_2 amazon_also_bought_price_2 amazon_also_bought_rating_2 amazon_also_bought_star_count_2 amazon_also_bought_title_3 amazon_also_bought_asin_3 amazon_also_bought_author_3 amazon_also_bought_price_3 amazon_also_bought_rating_3 amazon_also_bought_star_count_3 amazon_also_bought_title_4 amazon_also_bought_asin_4 amazon_also_bought_author_4 amazon_also_bought_price_4 amazon_also_bought_rating_4 amazon_also_bought_star_count_4 amazon_also_bought_title_5 amazon_also_bought_asin_5 amazon_also_bought_author_5 amazon_also_bought_price_5 amazon_also_bought_rating_5 amazon_also_bought_star_count_5 amazon_also_bought_title_6 amazon_also_bought_asin_6 amazon_also_bought_author_6 amazon_also_bought_price_6 amazon_also_bought_rating_6 amazon_also_bought_star_count_6].freeze
        
      
        
          1
          
            WAREHOUSE_AMAZON_BOUGHT_AFTER_VIEWING_FIELDS = %w[amazon_bought_after_viewing_title_1 amazon_bought_after_viewing_asin_1 amazon_bought_after_viewing_author_1 amazon_bought_after_viewing_rating_1 amazon_bought_after_viewing_star_count_1 amazon_bought_after_viewing_price_1 amazon_bought_after_viewing_title_2 amazon_bought_after_viewing_asin_2 amazon_bought_after_viewing_author_2 amazon_bought_after_viewing_rating_2 amazon_bought_after_viewing_star_count_2 amazon_bought_after_viewing_price_2 amazon_bought_after_viewing_title_3 amazon_bought_after_viewing_asin_3 amazon_bought_after_viewing_author_3 amazon_bought_after_viewing_rating_3 amazon_bought_after_viewing_star_count_3 amazon_bought_after_viewing_price_3 amazon_bought_after_viewing_title_4 amazon_bought_after_viewing_asin_4 amazon_bought_after_viewing_author_4 amazon_bought_after_viewing_rating_4 amazon_bought_after_viewing_star_count_4 amazon_bought_after_viewing_price_4].freeze
        
      
        
          1
          
            WAREHOUSE_AMAZON_FREQUENTLY_BOUGHT_TOGETHER_FIELDS = %w[amazon_frequently_bought_together_format_1 amazon_frequently_bought_together_price_1 amazon_frequently_bought_together_title_1 amazon_frequently_bought_together_format_2 amazon_frequently_bought_together_price_2 amazon_frequently_bought_together_title_2].freeze
        
      
        
          16
          
            WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_TREE_FIELDS = Array.new(15) {|num| "amazon_similar_item_category_tree_#{num + 1}"}.freeze
        
      
        
          16
          
            WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_EXTERNAL_ID_FIELDS = Array.new(15) {|num| "amazon_similar_item_category_external_id_#{num + 1}"}.freeze
        
      
        
          1
          
            WAREHOUSE_AMAZON_RELATED_FORMAT_FIELDS = %w[related_formats_kindle_price related_formats_kindle_asin related_formats_mass_market_paperback_price related_formats_mass_market_paperback_asin related_formats_nook_price related_formats_nook_ean related_formats_hardcover_price related_formats_hardcover_asin related_formats_paperback_price related_formats_paperback_asin].freeze
        
      
        
          17
          
            WAREHOUSE_AMAZON_ALSO_BOUGHT_ITEMS_BY_FIELDS = Array.new(16) {|num| "amazon_also_bought_items_by_#{num + 1}"}.freeze
        
      
        
          1
          
            WAREHOUSE_AMAZON_AUTHOR_RANK_FIELDS = %w[overall_author_rank sub_category1_author_rank sub_category2_author_rank sub_category3_author_rank sub_category4_author_rank sub_category1_author_rank_id sub_category2_author_rank_id sub_category3_author_rank_id sub_category4_author_rank_id].freeze
        
      
        
          1
          
            WAREHOUSE_GOODREADS_FIELDS = %w[goodreads_work_average_rating goodreads_work_rating_count goodreads_work_review_count goodreads_work_added_by_count goodreads_work_to_read_count goodreads_edition_average_rating goodreads_edition_rating_count goodreads_edition_review_count goodreads_edition_added_by_count goodreads_5_star_count goodreads_4_star_count goodreads_3_star_count goodreads_2_star_count goodreads_1_star_count].freeze
        
      
        
          1
          
            WAREHOUSE_BN_ALSO_BOUGHT_FIELDS = %w[bn_also_bought_title_1 bn_also_bought_ean_1 bn_also_bought_author_1 bn_also_bought_price_1 bn_also_bought_title_2 bn_also_bought_ean_2 bn_also_bought_author_2 bn_also_bought_price_2 bn_also_bought_title_3 bn_also_bought_ean_3 bn_also_bought_author_3 bn_also_bought_price_3 bn_also_bought_title_4 bn_also_bought_ean_4 bn_also_bought_author_4 bn_also_bought_price_4 bn_also_bought_title_5 bn_also_bought_ean_5 bn_also_bought_author_5 bn_also_bought_price_5 bn_also_bought_title_6 bn_also_bought_ean_6 bn_also_bought_author_6 bn_also_bought_price_6].freeze
        
      
        
          
          
          
        
      
        
          
          
            # Miscellaneous
        
      
        
          
          
          
        
      
        
          1
          
            def amazon_similar_item_category_names
        
      
        
          112
          
              WarehouseStat::WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_TREE_FIELDS.collect {|key| send key}.compact
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def amazon_similar_item_category_external_ids
        
      
        
          32
          
              WarehouseStat::WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_EXTERNAL_ID_FIELDS.collect {|key| send key}.compact
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Returns all similar item categories, including the canonical categories for any alternative category or deleted category
        
      
        
          1
          
            def all_similar_item_categories
        
      
        
          4
          
              categories = WarehouseCategory.com.where(name: amazon_similar_item_category_names).to_a
        
      
        
          
          
          
        
      
        
          4
          
              categories + categories.collect(&:canonical_category).compact
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Returns categories related to the categories in amazon_similar_item_category_tree_X fields
        
      
        
          1
          
            def related_categories
        
      
        
          2
          
              categories = if warehouse_book_version.book_format.include?('Kindle')
        
      
        
          3
          
                             all_similar_item_categories.select {|category| category.name.start_with?('Kindle Store')}
        
      
        
          
          
                           else
        
      
        
          3
          
                             all_similar_item_categories.reject {|category| category.name.start_with?('Kindle Store')}
        
      
        
          
          
                           end
        
      
        
          
          
          
        
      
        
          2
          
              WarehouseCategory.related_categories categories
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def extended_related_categories
        
      
        
          
          
              also_bought_asins = (1..6).collect {|num| send "amazon_also_bought_asin_#{num}"}.compact
        
      
        
          
          
              bought_after_viewing_asins = (1..4).collect {|num| send "amazon_bought_after_viewing_asin_#{num}"}.compact
        
      
        
          
          
              book_version_ids = WarehouseBookVersion.where(asin: (also_bought_asins + bought_after_viewing_asins).uniq, tld: warehouse_region.tld).value_of(:id)
        
      
        
          
          
              stats = WarehouseStat.where(warehouse_book_version_id: book_version_ids, warehouse_date_id: warehouse_date.id)
        
      
        
          
          
              stats.collect(&:related_categories).flatten.uniq
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_top100_rank_projections(strategy)
        
      
        
          
          
              categories = case strategy
        
      
        
          
          
                             when :related_categories
        
      
        
          
          
                               related_categories
        
      
        
          
          
                             when :extended_related_categories
        
      
        
          
          
                               extended_related_categories
        
      
        
          
          
                             when :similar_books
        
      
        
          
          
                               category_names = BookVersionCategory.where(warehouse_book_version_id: warehouse_book_version.similar_warehouse_book_version_ids).value_of :category_name
        
      
        
          
          
                               (WarehouseCategory.amazon.com.canonical.where(name: category_names) + WarehouseCategory.amazon.com.alternative.where(name: category_names).collect(&:canonical_category)).uniq.compact
        
      
        
          
          
                             when :similar_categories
        
      
        
          
          
                               (WarehouseCategory.amazon.com.canonical.where(name: warehouse_book_version.similar_categories) + WarehouseCategory.amazon.com.alternative.where(name: warehouse_book_version.similar_categories).collect(&:canonical_category)).uniq.compact
        
      
        
          
          
                           end
        
      
        
          
          
              details = categories.each_with_object({}) do |related_category, hash|
        
      
        
          
          
                hash[related_category.id] = {currently_ranked: false, position: 101, next_sales_rank: 0, prev_sales_rank: 0, name: related_category.name}
        
      
        
          
          
              end
        
      
        
          
          
              list_stats = WarehouseListStat.where(warehouse_category_id: details.keys, warehouse_date_id: warehouse_date_id)
        
      
        
          
          
              stats_by_book_version_id = WarehouseStat.where(warehouse_book_version_id: list_stats.collect(&:warehouse_book_version_id).compact, warehouse_date_id: warehouse_date_id).each_with_object({}.with_indifferent_access) do |warehouse_stat, hash|
        
      
        
          
          
                hash[warehouse_stat.warehouse_book_version_id] = warehouse_stat
        
      
        
          
          
              end
        
      
        
          
          
              list_stats.each do |list_stat|
        
      
        
          
          
                details[list_stat.warehouse_category_id][:currently_ranked] = true if list_stat.asin == warehouse_book_version.asin
        
      
        
          
          
                next if list_stat.warehouse_book_version_id.blank? || amazon_sales_rank.blank?
        
      
        
          
          
                if stats_by_book_version_id[list_stat.warehouse_book_version_id].present? && stats_by_book_version_id[list_stat.warehouse_book_version_id].amazon_sales_rank.present?
        
      
        
          
          
                  if amazon_sales_rank <= stats_by_book_version_id[list_stat.warehouse_book_version_id].amazon_sales_rank && details[list_stat.warehouse_category_id][:position] > list_stat.rank
        
      
        
          
          
                    details[list_stat.warehouse_category_id][:position] = list_stat.rank
        
      
        
          
          
                    details[list_stat.warehouse_category_id][:prev_sales_rank] = stats_by_book_version_id[list_stat.warehouse_book_version_id].amazon_sales_rank
        
      
        
          
          
                  elsif amazon_sales_rank > stats_by_book_version_id[list_stat.warehouse_book_version_id].amazon_sales_rank && details[list_stat.warehouse_category_id][:next_sales_rank] < stats_by_book_version_id[list_stat.warehouse_book_version_id].amazon_sales_rank
        
      
        
          
          
                    details[list_stat.warehouse_category_id][:next_sales_rank] = stats_by_book_version_id[list_stat.warehouse_book_version_id].amazon_sales_rank
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  if details[list_stat.warehouse_category_id][:best_sales_rank].blank? || stats_by_book_version_id[list_stat.warehouse_book_version_id].amazon_sales_rank < details[list_stat.warehouse_category_id][:best_sales_rank]
        
      
        
          
          
                    details[list_stat.warehouse_category_id][:best_sales_rank] = stats_by_book_version_id[list_stat.warehouse_book_version_id].amazon_sales_rank
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  if details[list_stat.warehouse_category_id][:worst_sales_rank].blank? || stats_by_book_version_id[list_stat.warehouse_book_version_id].amazon_sales_rank > details[list_stat.warehouse_category_id][:worst_sales_rank]
        
      
        
          
          
                    details[list_stat.warehouse_category_id][:worst_sales_rank] = stats_by_book_version_id[list_stat.warehouse_book_version_id].amazon_sales_rank
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              details
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.top100_rank_projections(warehouse_date, stats, extended)
        
      
        
          
          
              canonical_category_id_by_alternative_parent_id = WarehouseCategory.alternative.com.value_of(:parent_id, :canonical_category_id).each_with_object({}) do |values, hash|
        
      
        
          
          
                hash[values[0]] ||= []
        
      
        
          
          
                hash[values[0]] << values[1]
        
      
        
          
          
              end
        
      
        
          
          
              all_similar_item_category_names = stats.collect(&:amazon_similar_item_category_names).flatten.uniq.select {|name| name.start_with?('Kindle Store')}
        
      
        
          
          
              initial_categories = WarehouseCategory.com.where(name: all_similar_item_category_names).to_a
        
      
        
          
          
              base_categories = (initial_categories + initial_categories.collect(&:canonical_category).compact)
        
      
        
          
          
              all_category_ids = (base_categories.collect(&:id) + base_categories.collect(&:parent_id).compact).uniq
        
      
        
          
          
              canonical_category_ids = all_category_ids.collect {|category_id| canonical_category_id_by_alternative_parent_id[category_id]}.flatten.compact.uniq
        
      
        
          
          
              all_categories = (WarehouseCategory.canonical.where(parent_id: all_category_ids) + WarehouseCategory.find(canonical_category_ids)).flatten.uniq
        
      
        
          
          
          
        
      
        
          
          
              list_stat_values_by_category_id = WarehouseListStat.where(warehouse_category_id: all_categories.collect(&:id), warehouse_date_id: warehouse_date.id).order(:warehouse_category_id).value_of(:warehouse_book_version_id, :warehouse_category_id, :rank).chunk {|_, warehouse_category_id, _| warehouse_category_id}.each_with_object({}) do |values, hash|
        
      
        
          
          
                hash[values[0]] = values[1]
        
      
        
          
          
              end
        
      
        
          
          
              warehouse_book_version_ids = list_stat_values_by_category_id.collect {|_, values| values.collect(&:first)}.flatten.uniq
        
      
        
          
          
              amazon_sales_rank_by_book_version_id = WarehouseStat.where(warehouse_book_version_id: warehouse_book_version_ids, warehouse_date_id: warehouse_date.id).value_of(:warehouse_book_version_id, :amazon_sales_rank).each_with_object({}.with_indifferent_access) do |warehouse_stat_values, hash|
        
      
        
          
          
                hash[warehouse_stat_values[0]] = warehouse_stat_values[1]
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              stats.each_with_object({}.with_indifferent_access) do |stat, hash|
        
      
        
          
          
                my_category_names = stat.amazon_similar_item_category_names
        
      
        
          
          
                my_filtered_categories = base_categories.select {|category| my_category_names.include? category.name}
        
      
        
          
          
                my_category_ids = my_filtered_categories.collect(&:id) + my_filtered_categories.collect(&:parent_id).compact
        
      
        
          
          
                my_canonical_category_ids = my_category_ids.collect {|category_id| canonical_category_id_by_alternative_parent_id[category_id]}.flatten.compact.uniq
        
      
        
          
          
                my_categories = all_categories.select {|category| my_category_ids.include?(category.parent_id) || my_canonical_category_ids.include?(category.id)}
        
      
        
          
          
          
        
      
        
          
          
                details = my_categories.each_with_object({}.with_indifferent_access) do |related_category, details_hash|
        
      
        
          
          
                  details_hash[related_category.id] = {currently_ranked: false, position: 101, next_sales_rank: 0, prev_sales_rank: 0, name: related_category.name}
        
      
        
          
          
                end
        
      
        
          
          
                my_categories.collect(&:id).each do |my_category_id|
        
      
        
          
          
                  list_stat_values_by_category_id[my_category_id].each do |warehouse_book_version_id, warehouse_category_id, rank|
        
      
        
          
          
                    next unless warehouse_book_version_id.present? && amazon_sales_rank_by_book_version_id[warehouse_book_version_id].present? && stat.amazon_sales_rank.present?
        
      
        
          
          
          
        
      
        
          
          
                    details[warehouse_category_id][:currently_ranked] = true if stat.warehouse_book_version_id == warehouse_book_version_id
        
      
        
          
          
                    if stat.amazon_sales_rank <= amazon_sales_rank_by_book_version_id[warehouse_book_version_id] && details[warehouse_category_id][:position] > rank
        
      
        
          
          
                      details[warehouse_category_id][:position] = rank
        
      
        
          
          
                      details[warehouse_category_id][:prev_sales_rank] = amazon_sales_rank_by_book_version_id[warehouse_book_version_id]
        
      
        
          
          
                    elsif stat.amazon_sales_rank > amazon_sales_rank_by_book_version_id[warehouse_book_version_id] && details[warehouse_category_id][:next_sales_rank] < amazon_sales_rank_by_book_version_id[warehouse_book_version_id]
        
      
        
          
          
                      details[warehouse_category_id][:next_sales_rank] = amazon_sales_rank_by_book_version_id[warehouse_book_version_id]
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          
          
                    if details[warehouse_category_id][:best_sales_rank].blank? || amazon_sales_rank_by_book_version_id[warehouse_book_version_id] < details[warehouse_category_id][:best_sales_rank]
        
      
        
          
          
                      details[warehouse_category_id][:best_sales_rank] = amazon_sales_rank_by_book_version_id[warehouse_book_version_id]
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          
          
                    if details[warehouse_category_id][:worst_sales_rank].blank? || amazon_sales_rank_by_book_version_id[warehouse_book_version_id] > details[warehouse_category_id][:worst_sales_rank]
        
      
        
          
          
                      details[warehouse_category_id][:worst_sales_rank] = amazon_sales_rank_by_book_version_id[warehouse_book_version_id]
        
      
        
          
          
                    end
        
      
        
          
          
                  end if list_stat_values_by_category_id[my_category_id].present?
        
      
        
          
          
                end if my_categories.present?
        
      
        
          
          
          
        
      
        
          
          
                hash[stat.warehouse_book_version_id] = details
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class WarehouseTrend < ActiveRecord::Base
        
      
        
          1
          
            attr_accessible :name
        
      
        
          
          
          
        
      
        
          
          
            # Associations
        
      
        
          1
          
            has_many :warehouse_list_stats, inverse_of: :warehouse_trend
        
      
        
          
          
          
        
      
        
          
          
            # Validations
        
      
        
          1
          
            validates_presence_of :name
        
      
        
          
          
          end

    
      
        
          1
          
          module AmazonApi
        
      
        
          1
          
            extend self
        
      
        
          
          
          
        
      
        
          1
          
            def get_all_items_by_keys_and_tld(keys, key_type, tld)
        
      
        
          14
          
              keys = Array.wrap(keys).uniq
        
      
        
          14
          
              items_by_key = {}
        
      
        
          
          
          
        
      
        
          14
          
              if key_type.to_sym != :asin && key_type.to_sym != :isbn13
        
      
        
          2
          
                return keys.each_with_object({}) {|key, hash| hash[key] = {lookup: [], search: [], status: :invalid_key_type}}
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          13
          
              items_by_key = keys.each_with_object({}) do |key, hash|
        
      
        
          17
          
                hash[key] = {lookup: [], search: [], status: :ready_for_amazon_ingestion}
        
      
        
          17
          
                hash[key][:status] = :invalid_key if key.blank? || (key_type.to_sym == :asin && key.length != 10) || (key_type.to_sym == :isbn13 && key.length != 13)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          30
          
              lookup_key_hashes = items_by_key.select {|key, hash| hash[:status] == :ready_for_amazon_ingestion}
        
      
        
          
          
          
        
      
        
          13
          
              return items_by_key unless lookup_key_hashes.present?
        
      
        
          
          
          
        
      
        
          10
          
              Amazon::Ecs.options = AmazeBot.config[:amazon][:amazon_ecs][:options]
        
      
        
          10
          
              item_lookup_res = Amazon::Ecs.item_lookup(lookup_key_hashes.keys.join(','), AmazeBot.config[:amazon][:amazon_ecs][:item_lookup_options].merge({country: convert_tld_to_country(tld)}))
        
      
        
          8
          
              match_response_items_by_key!(lookup_key_hashes, item_lookup_res, key_type, :lookup)
        
      
        
          8
          
              items_by_key.merge lookup_key_hashes
        
      
        
          
          
          
        
      
        
          16
          
              search_key_hashes = lookup_key_hashes.select {|key, hash| hash[:lookup].length != 1}
        
      
        
          8
          
              if search_key_hashes.present?
        
      
        
          5
          
                item_search_res = Amazon::Ecs.item_search(search_key_hashes.keys.join(' | '), AmazeBot.config[:amazon][:amazon_ecs][:item_search_options].merge({country: convert_tld_to_country(tld)}))
        
      
        
          5
          
                match_response_items_by_key!(search_key_hashes, item_search_res, key_type, :search)
        
      
        
          
          
          
        
      
        
          5
          
                search_key_hashes.each do |key, hash|
        
      
        
          5
          
                  if hash[:search].length > 1 || (hash[:search].length == 0 && hash[:lookup].length > 1)
        
      
        
          4
          
                    Rails.logger.tagged('book_data') {Rails.logger.info "item_search returned more than 1 result or item_lookup returned more than one result (with no results from item_search) so we wont ingest: #{key}"}
        
      
        
          2
          
                    hash[:status] = :ambiguous_results
        
      
        
          
          
                  elsif hash[:search].length == 1
        
      
        
          4
          
                    Rails.logger.tagged('book_data') {Rails.logger.info "item_lookup returned no results (or more than 1 result) but item_search returned only 1 result so we will ingest: #{key}"}
        
      
        
          
          
                  else
        
      
        
          2
          
                    Rails.logger.tagged('book_data') {Rails.logger.info "item_lookup returned no results and item_search returned no results so this book doesn't exist: #{key}"}
        
      
        
          1
          
                    hash[:status] = :no_results
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          5
          
                items_by_key.merge search_key_hashes
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          8
          
              items_by_key
        
      
        
          
          
            rescue *HTTP_ERRORS => e
        
      
        
          2
          
              if e.message.include? "503"
        
      
        
          2
          
                Rails.logger.tagged('book_data') {Rails.logger.info "The amazon api response request for IBSN #{keys.join(", ")} was throttled #{e.message}"}
        
      
        
          2
          
                items_by_key.each {|key, hash| hash[:status] = :throttled}
        
      
        
          1
          
                return items_by_key
        
      
        
          
          
              end
        
      
        
          2
          
              Rails.logger.tagged('book_data') {Rails.logger.info "The amazon api response request for IBSN #{keys.join(", ")} returned an unknown error: #{e.message}"}
        
      
        
          2
          
              items_by_key.each {|key, hash| hash[:status] = :external_error}
        
      
        
          1
          
              return items_by_key
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_asin_from_item(item)
        
      
        
          11
          
              item.get("ASIN")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_author_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get("ItemAttributes/Author")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_binding_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get("ItemAttributes/Binding")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_brand_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get("ItemAttributes/Brand")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_creator_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get("ItemAttributes/Creator")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_ean_from_item(item)
        
      
        
          7
          
              get_decoder.decode item.get("ItemAttributes/EAN")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_ean_list_element_from_item(item)
        
      
        
          1
          
              get_decoder.decode item.get("ItemAttributes/EANList/EANListElement")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_ean_list_elements_from_item(item)
        
      
        
          12
          
              item.get_array("ItemAttributes/EANList/EANListElement").map {|val| get_decoder.decode val}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_eisbn_from_item(item)
        
      
        
          7
          
              get_decoder.decode item.get("ItemAttributes/EISBN")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_isbn_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get("ItemAttributes/ISBN")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_item_dimensions_height_from_item(item)
        
      
        
          4
          
              get_decoder.decode(item.get("ItemAttributes/ItemDimensions/Height")).to_i if item.get("ItemAttributes/ItemDimensions/Height").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_item_dimensions_height_unit_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get_element("ItemAttributes/ItemDimensions/Height").attributes["Units"].value if item.get_element("ItemAttributes/ItemDimensions/Height").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_item_dimensions_length_from_item(item)
        
      
        
          4
          
              get_decoder.decode(item.get("ItemAttributes/ItemDimensions/Length")).to_i if item.get("ItemAttributes/ItemDimensions/Length").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_item_dimensions_length_unit_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get_element("ItemAttributes/ItemDimensions/Length").attributes["Units"].value if item.get_element("ItemAttributes/ItemDimensions/Length").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_item_dimensions_weight_from_item(item)
        
      
        
          4
          
              get_decoder.decode(item.get("ItemAttributes/ItemDimensions/Weight")).to_i if item.get("ItemAttributes/ItemDimensions/Weight").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_item_dimensions_weight_unit_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get_element("ItemAttributes/ItemDimensions/Weight").attributes["Units"].value if item.get_element("ItemAttributes/ItemDimensions/Weight").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_item_dimensions_width_from_item(item)
        
      
        
          4
          
              get_decoder.decode(item.get("ItemAttributes/ItemDimensions/Width")).to_i if item.get("ItemAttributes/ItemDimensions/Width").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_item_dimensions_width_unit_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get_element("ItemAttributes/ItemDimensions/Width").attributes["Units"].value if item.get_element("ItemAttributes/ItemDimensions/Width").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_label_from_item(item)
        
      
        
          5
          
              if item.get("ItemAttributes/Label").present?
        
      
        
          2
          
                label = item.get("ItemAttributes/Label")
        
      
        
          2
          
                get_decoder.decode label.first(255)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_large_image_url_from_item(item)
        
      
        
          4
          
              URI.decode item.get("LargeImage/URL") if item.get("LargeImage/URL").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_medium_image_url_from_item(item)
        
      
        
          4
          
              URI.decode item.get("MediumImage/URL") if item.get("MediumImage/URL").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_small_image_url_from_item(item)
        
      
        
          4
          
              URI.decode item.get("SmallImage/URL") if item.get("SmallImage/URL").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_list_price_amount_from_item(item)
        
      
        
          4
          
              get_decoder.decode(item.get("ItemAttributes/ListPrice/Amount")).to_i if item.get("ItemAttributes/ListPrice/Amount").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_list_price_currency_code_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get("ItemAttributes/ListPrice/CurrencyCode")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_manufacturer_from_item(item)
        
      
        
          5
          
              if item.get("ItemAttributes/Manufacturer").present?
        
      
        
          2
          
                manufacturer = item.get("ItemAttributes/Manufacturer")
        
      
        
          2
          
                get_decoder.decode manufacturer.first(255)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_number_of_pages_from_item(item)
        
      
        
          4
          
              get_decoder.decode(item.get("ItemAttributes/NumberOfPages")).to_i if item.get("ItemAttributes/NumberOfPages").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_package_dimensions_height_from_item(item)
        
      
        
          4
          
              get_decoder.decode(item.get("ItemAttributes/PackageDimensions/Height")).to_i if item.get("ItemAttributes/PackageDimensions/Height").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_package_dimensions_height_unit_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get_element("ItemAttributes/PackageDimensions/Height").attributes["Units"].value if item.get_element("ItemAttributes/PackageDimensions/Height").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_package_dimensions_length_from_item(item)
        
      
        
          4
          
              get_decoder.decode(item.get("ItemAttributes/PackageDimensions/Length")).to_i if item.get("ItemAttributes/PackageDimensions/Length").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_package_dimensions_length_unit_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get_element("ItemAttributes/PackageDimensions/Length").attributes["Units"].value if item.get_element("ItemAttributes/PackageDimensions/Length").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_package_dimensions_weight_from_item(item)
        
      
        
          4
          
              get_decoder.decode(item.get("ItemAttributes/PackageDimensions/Weight")).to_i if item.get("ItemAttributes/PackageDimensions/Weight").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_package_dimensions_weight_unit_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get_element("ItemAttributes/PackageDimensions/Weight").attributes["Units"].value if item.get_element("ItemAttributes/PackageDimensions/Weight").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_package_dimensions_width_from_item(item)
        
      
        
          4
          
              get_decoder.decode(item.get("ItemAttributes/PackageDimensions/Width")).to_i if item.get("ItemAttributes/PackageDimensions/Width").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_package_dimensions_width_unit_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get_element("ItemAttributes/PackageDimensions/Width").attributes["Units"].value if item.get_element("ItemAttributes/PackageDimensions/Width").present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_publication_date_from_item(item)
        
      
        
          8
          
              date = item.get("ItemAttributes/PublicationDate")
        
      
        
          8
          
              if date.present?
        
      
        
          5
          
                if date.include? "-"
        
      
        
          3
          
                  if date.count("-") > 1
        
      
        
          2
          
                    return Date.strptime(date, "%Y-%m-%d")
        
      
        
          
          
                  else
        
      
        
          1
          
                    return Date.strptime(date, "%Y-%m")
        
      
        
          
          
                  end
        
      
        
          2
          
                elsif date.strip.scan(/\D/).blank?
        
      
        
          1
          
                  return Date.strptime(date, "%Y")
        
      
        
          
          
                else
        
      
        
          1
          
                  return Date.strptime(date, "%b %Y")
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            rescue ArgumentError
        
      
        
          2
          
              Rails.logger.tagged('book_data') {Rails.logger.info "Amazon API returned malformed date string, can't parse date"}
        
      
        
          1
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_publisher_from_item(item)
        
      
        
          5
          
              if item.get("ItemAttributes/Publisher").present?
        
      
        
          2
          
                publisher = item.get("ItemAttributes/Publisher")
        
      
        
          2
          
                get_decoder.decode publisher.first(255)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_sales_rank_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get("SalesRank")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_studio_from_item(item)
        
      
        
          5
          
              if item.get("ItemAttributes/Studio").present?
        
      
        
          2
          
                studio = item.get("ItemAttributes/Studio")
        
      
        
          2
          
                get_decoder.decode studio.first(255)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_title_from_item(item)
        
      
        
          4
          
              get_decoder.decode item.get("ItemAttributes/Title")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_all_isbns(item)
        
      
        
          3
          
              Array.wrap(AmazonApi.get_eisbn_from_item(item)) + Array.wrap(AmazonApi.get_ean_from_item(item)) + AmazonApi.get_ean_list_elements_from_item(item)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def get_decoder
        
      
        
          82
          
              @decoder ||= HTMLEntities.new
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def match_response_items_by_key!(key_hashes, response, key_type, api_query_type)
        
      
        
          13
          
              key_hashes.keys.each do |key|
        
      
        
          13
          
                response.items.each do |item|
        
      
        
          9
          
                  if key_type.to_sym == :asin
        
      
        
          7
          
                    key_hashes[key][api_query_type] << item if AmazonApi.get_asin_from_item(item) == key
        
      
        
          
          
                  else
        
      
        
          2
          
                    key_hashes[key][api_query_type] << item if AmazonApi.get_all_isbns(item).include? key
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def convert_tld_to_country(tld)
        
      
        
          18
          
              case tld
        
      
        
          
          
                when '.com'
        
      
        
          16
          
                  'us'
        
      
        
          
          
                when '.co.uk'
        
      
        
          1
          
                  'uk'
        
      
        
          
          
                else
        
      
        
          1
          
                  throw 'Bad TLD/Country conversion'
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module DataAnalysis
        
      
        
          1
          
            asin = 'B008LQ1A68'
        
      
        
          
          
          
        
      
        
          1
          
            def self.all_also_bought_appearance_counts_report(asins)
        
      
        
          
          
              dates = (Date.current - 1.month)..Date.current
        
      
        
          
          
              data = dates.each_with_object({}) do |date, hash|
        
      
        
          
          
                counts = DataAnalysis.all_also_bought_appearance_counts(date)
        
      
        
          
          
                hash[date] = asins.collect {|asin| counts[asin]}
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              csv = CSV.open('./also_bought_appearances.csv', 'wb')
        
      
        
          
          
              csv << ['Date', 'Total Books', 'Total Books With Appearances', 'Percent Books with Appearances', 'Mean Appearance Count', 'Median Appearance Count', 'Mode Appearance Count']
        
      
        
          
          
              dates.each do |date|
        
      
        
          
          
                csv << [date, asins.count, data[date].compact.count, "#{'%.2f' % (data[date].compact.count.to_f / asins.count * 100.0)}%",
        
      
        
          
          
                        data[date].compact.mean, data[date].compact.median, data[date].compact.mode]
        
      
        
          
          
              end
        
      
        
          
          
              csv.close
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.all_also_bought_appearance_counts(date)
        
      
        
          
          
              warehouse_date_id = WarehouseDate.find_by(date: date).id
        
      
        
          
          
              counts = (0..5).collect do |num|
        
      
        
          
          
                WarehouseStat.where(warehouse_date_id: warehouse_date_id, warehouse_region_id: WarehouseRegion.com.id).group(:"amazon_also_bought_asin_#{num + 1}").count
        
      
        
          
          
              end
        
      
        
          
          
              total_counts = counts[0]
        
      
        
          
          
              (1..5).each do |num|
        
      
        
          
          
                counts[num].each_pair {|asin_key, count| total_counts[asin_key] = total_counts.fetch(asin_key, 0) + count}
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              total_counts
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.also_bought_appearances(asin, date)
        
      
        
          
          
              warehouse_date_id = WarehouseDate.find_by(date: date).id
        
      
        
          
          
          
        
      
        
          
          
              WarehouseStat.where{(amazon_also_bought_asin_1 == asin) |
        
      
        
          
          
                                  (amazon_also_bought_asin_2 == asin) |
        
      
        
          
          
                                  (amazon_also_bought_asin_3 == asin) |
        
      
        
          
          
                                  (amazon_also_bought_asin_4 == asin) |
        
      
        
          
          
                                  (amazon_also_bought_asin_5 == asin) |
        
      
        
          
          
                                  (amazon_also_bought_asin_6 == asin)}.where(warehouse_date_id: warehouse_date_id, warehouse_region_id: WarehouseRegion.com.id).count
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # dates = (Date.current - 1.week)..Date.current
        
      
        
          1
          
            def self.dates_entered_top_100_lists_report(asins, start_date, end_date)
        
      
        
          
          
              warehouse_dates_by_id = WarehouseDate.where(date: (start_date - 1.day)..end_date).each_with_object({}) do |warehouse_date, hash|
        
      
        
          
          
                hash[warehouse_date.id] = warehouse_date
        
      
        
          
          
              end
        
      
        
          
          
              warehouse_dates = warehouse_dates_by_id.values
        
      
        
          
          
              data_by_asins = {}
        
      
        
          
          
              warehouse_dates.each do |warehouse_date|
        
      
        
          
          
                WarehouseListStat.where(warehouse_date_id: warehouse_date.id).where(asin: asins).each do |list_stat|
        
      
        
          
          
                  data_by_asins[list_stat.asin] ||= {}
        
      
        
          
          
                  data_by_asins[list_stat.asin][list_stat.warehouse_category_id] ||= []
        
      
        
          
          
                  data_by_asins[list_stat.asin][list_stat.warehouse_category_id] << warehouse_dates_by_id[list_stat.warehouse_date_id].date
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              warehouse_categories_by_id = WarehouseCategory.where(id: data_by_asins.values.collect(&:keys).flatten.uniq).each_with_object({}) do |category, hash|
        
      
        
          
          
                hash[category.id] = category
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              dates_entered = {}
        
      
        
          
          
              report = warehouse_dates.each_with_object({}) {|warehouse_date, hash| hash[warehouse_date.date] = {}}
        
      
        
          
          
              data_by_asins.each_pair do |asin, date_lists_by_warehouse_category_id|
        
      
        
          
          
                dates_entered[asin] ||= {}
        
      
        
          
          
                date_lists_by_warehouse_category_id.each_pair do |warehouse_category_id, date_list|
        
      
        
          
          
                  dates = []
        
      
        
          
          
                  warehouse_dates[1..-1].each_with_index do |warehouse_date, index|
        
      
        
          
          
                    if date_list.present? && date_list.include?(warehouse_date.date) && date_list.exclude?(warehouse_dates[index].date)
        
      
        
          
          
                      dates << warehouse_date.date
        
      
        
          
          
                      report[warehouse_date.date][asin] ||= 0
        
      
        
          
          
                      report[warehouse_date.date][asin] += 1
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  dates_entered[asin][warehouse_categories_by_id[warehouse_category_id]] = dates
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              csv = CSV.open('./date_entered_top_100s.csv', 'wb')
        
      
        
          
          
              csv << ['Date', 'Total Books', 'Total Books Newly Entering Top 100s', 'Percent Books That Newly Entered Lists',
        
      
        
          
          
                      'Mean # of Lists Newly Entered', 'Median # of Lists Newly Entered', 'Mode # of Lists Newly Entered']
        
      
        
          
          
              report.each_pair do |date, hash|
        
      
        
          
          
                csv << [date, asins.count, report[date].values.sum, "#{'%.2f' % (report[date].values.sum.to_f / asins.count * 100.0)}%",
        
      
        
          
          
                        report[date].values.mean, report[date].values.median, report[date].values.mode]
        
      
        
          
          
              end
        
      
        
          
          
              csv.close
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.number_of_new_amazon_reviews(asin, start_date, end_date)
        
      
        
          
          
              book_version = WarehouseBookVersion.find_by(asin: asin, tld: '.com')
        
      
        
          
          
              warehouse_date_ids = WarehouseDate.where(date: (start_date - 1.day)..end_date).value_of(:id)
        
      
        
          
          
              stats = book_version.warehouse_stats.includes(:warehouse_date).where(warehouse_date_id: warehouse_date_ids).joins(:warehouse_date).order('warehouse_dates.date ASC')
        
      
        
          
          
          
        
      
        
          
          
              if stats.present?
        
      
        
          
          
                stats[1..-1].each_with_index.each_with_object({}) do |stat_and_index, hash|
        
      
        
          
          
                  current_count = stat_and_index[0].amazon_review_count || 0
        
      
        
          
          
                  day_before_count = stats[stat_and_index[1]].amazon_review_count || 0
        
      
        
          
          
                  hash[stat_and_index[0].warehouse_date.date] = (current_count) - (day_before_count)
        
      
        
          
          
                end
        
      
        
          
          
              else
        
      
        
          
          
                {}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.number_of_new_amazon_reviews_report(asins, start_date, end_date)
        
      
        
          
          
              dates = start_date..end_date
        
      
        
          
          
              data = asins.each_with_object({}) do |asin, hash|
        
      
        
          
          
                review_counts = DataAnalysis.number_of_new_amazon_reviews(asin, dates.first, dates.last)
        
      
        
          
          
                review_counts.each_pair do |date, count|
        
      
        
          
          
                  hash[date] ||= []
        
      
        
          
          
                  hash[date] << count
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              csv = CSV.open('./new_amazon_reviews.csv', 'wb')
        
      
        
          
          
              csv << ['Date', 'Total Books', 'Total Books With New Reviews', 'Average # of Books with New Reviews', 'Mean New Review Count', 'Median New Review Count', 'Mode New Review Count']
        
      
        
          
          
              dates.each do |date|
        
      
        
          
          
                csv << [date, asins.count, data[date].compact.count {|x| x > 0}, "#{'%.2f' % (data[date].compact.count {|x| x > 0}.to_f / asins.count * 100.0)}%",
        
      
        
          
          
                        data[date].compact.select {|x| x > 0}.mean, data[date].compact.select {|x| x > 0}.median, data[date].compact.select {|x| x > 0}.mode]
        
      
        
          
          
              end
        
      
        
          
          
              csv.close
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module DataCleanup
        
      
        
          1
          
            extend self
        
      
        
          
          
          
        
      
        
          1
          
            def populate_missing_isbn13_from_api(book_version)
        
      
        
          3
          
              if book_version.isbn13.blank? && book_version.update_amazon_api_response == :ready_for_amazon_ingestion
        
      
        
          3
          
                item = book_version.amazon_api_response.matching_response_item
        
      
        
          3
          
                if item.class != Symbol && item.isbn13.present?
        
      
        
          2
          
                  book_version.isbn13 = item.isbn13
        
      
        
          2
          
                  if WarehouseBookVersion.where(isbn13: book_version.isbn13, tld: book_version.tld).exists?
        
      
        
          2
          
                    Rails.logger.tagged('cleanup') {Rails.logger.info "BookVersion #{book_version.id} ASIN: #{book_version.asin} matches existing isbn13: #{book_version.isbn13}"}
        
      
        
          1
          
                    book_version.duplicate_key = book_version.isbn13
        
      
        
          1
          
                    book_version.isbn13 = nil
        
      
        
          1
          
                    book_version.status = :duplicate_isbn13
        
      
        
          
          
                  else
        
      
        
          2
          
                    Rails.logger.tagged('cleanup') {Rails.logger.info "Updated BookVersion #{book_version.id} ASIN: #{book_version.asin} to isbn13: #{book_version.isbn13}"}
        
      
        
          
          
                  end
        
      
        
          2
          
                  book_version.save
        
      
        
          
          
                else
        
      
        
          2
          
                  Rails.logger.tagged('cleanup') {Rails.logger.info "BookVersion #{book_version.id} ASIN: #{book_version.asin} found no isbn13. Self published?"}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # TODO: Needs to be fixed, author_name = book_version.title ahahahahahahahahaha
        
      
        
          
          
            #def find_ean_from_asin(book_version)
        
      
        
          
          
            #  # If book version has no isbn13 or bn_id, proceed to matching to convert ASIN -> EAN (which is either an ISBN13 or BN ID)
        
      
        
          
          
            #  ean = nil
        
      
        
          
          
            #  book_format = book_version.book_format
        
      
        
          
          
            #  title = book_version.title
        
      
        
          
          
            #  author_name = book_version.title
        
      
        
          
          
            #
        
      
        
          
          
            #  # If book_version has type, title and author_name proceed
        
      
        
          
          
            #  if book_format.present? && title.present? && author_name.present?
        
      
        
          
          
            #    amazon_page = AmazonProductPage.new book_version.amazon_url
        
      
        
          
          
            #    ean = amazon_page.scrape_isbn_13
        
      
        
          
          
            #
        
      
        
          
          
            #    # If format isn't of these 3 types, our only option is to find the isbn13 on the page so return it immediately
        
      
        
          
          
            #    return ean if book_format.exclude?('Hardcover') && book_format.exclude?('Paperback') && book_format.exclude?('Kindle')
        
      
        
          
          
            #
        
      
        
          
          
            #    #TODO: consider switching order of the next 2 methods if one proves to be more reliable than the other
        
      
        
          
          
            #    # check if any BN search results show same pub date as book version
        
      
        
          
          
            #    if book_version.pub_date.present? && ean.blank?
        
      
        
          
          
            #      bn_search_page = BnSearchPage.by_title_and_author_and_book_format book_version.title, book_version.author_name, book_version.book_format
        
      
        
          
          
            #      ean = bn_search_page.ean_for book_version.pub_date
        
      
        
          
          
            #    end
        
      
        
          
          
            #
        
      
        
          
          
            #    # if no pub date match exists go through amazon physical method and get isbn13 for a physical version
        
      
        
          
          
            #    # only do this when trying to find a matching Nook for a Kindle title
        
      
        
          
          
            #    #TODO: clean this up to work for all book formats if it seems promising
        
      
        
          
          
            #    ean = DataCleanup.find_isbn13_by_amazon_physical_method amazon_page if ean.blank? && book_format.include?('Kindle')
        
      
        
          
          
            #  else
        
      
        
          
          
            #    Rails.logger.tagged('cleanup') {Rails.logger.info "Book Version #{book_version.id} is missing data (book_format, book, or book author) and matching can't proceed"}
        
      
        
          
          
            #  end
        
      
        
          
          
            #
        
      
        
          
          
            #  ean
        
      
        
          
          
            #end
        
      
        
          
          
          
        
      
        
          1
          
            def find_isbn13_by_amazon_physical_method(page)
        
      
        
          1
          
              isbn13s = page.scrape_physical_isbn13s
        
      
        
          
          
          
        
      
        
          1
          
              isbn13s.each do |isbn13|
        
      
        
          1
          
                bn_page = BnBookPage.by_ean(isbn13)
        
      
        
          1
          
                if bn_page.ok?
        
      
        
          1
          
                  ean = bn_page.scrape_nook_ean
        
      
        
          1
          
                  return ean if ean.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          require 'csv'
        
      
        
          1
          
          require 'net/ftp'
        
      
        
          1
          
          require 'zip'
        
      
        
          
          
          
        
      
        
          1
          
          module EnterpriseReports
        
      
        
          1
          
            INVALID_CHARS_MAPPING = {"¨" => ' ', "ʺ" => '""', "˝" => '""', "ˮ" => '""', "Ҍ" => ' ', "ҍ" => ' ', "״" => '""', "“" => '""', "”" => '""', "‟" => '""', "″" => '""', "⠐" => ' ', "〃" => ' ', "々" => ' ', "ゝ" => ' ', "ゞ" => ' ', "ヽ" => ' ', "ヾ" => ' '}
        
      
        
          1
          
            extend self
        
      
        
          
          
          
        
      
        
          1
          
            def sql_copy_to_csv_and_deliver_report(sql, client_name, base_filename, expected_count, ftp: false, emailable_report_name: nil, gzip: false)
        
      
        
          6
          
              Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_class}(#{client_name}) Report Started -----"}
        
      
        
          
          
          
        
      
        
          3
          
              csv = sql_copy_to_csv("#{client_name}-#{emailable_report_name}",
        
      
        
          
          
                                    get_report_location(client_name, get_filename(base_filename)),
        
      
        
          
          
                                    WarehouseStat.connection.raw_connection,
        
      
        
          
          
                                    sql)
        
      
        
          
          
          
        
      
        
          
          
          
        
      
        
          3
          
              csv = gzip_report(csv, client_name) if gzip
        
      
        
          3
          
              move_to_s3(client_name, csv)
        
      
        
          
          
          
        
      
        
          
          
              # Determine if report sending is valid based on counts then ftp/set redis/email
        
      
        
          3
          
              row_count = $redis.hgetall('daily_report_stats')["#{client_name}-#{emailable_report_name}-row-count"].to_f
        
      
        
          3
          
              if expected_count.nil? || EnterpriseReports.report_count_valid?(row_count, expected_count)
        
      
        
          2
          
                ftp_to_client(client_name, csv) if ftp
        
      
        
          2
          
                EnterpriseReportsMailer.basic_report(generate_report_hash(base_filename, client_name, gzip), get_report_email_details(client_name, emailable_report_name)).deliver if emailable_report_name.present?
        
      
        
          4
          
                Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_class}(#{client_name}) Report Delivered -----"}
        
      
        
          
          
              else
        
      
        
          1
          
                EnterpriseReports.send_report_count_error base_filename, row_count, expected_count
        
      
        
          2
          
                Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_class}(#{client_name}) Report NOT Delivered: row count off by 5% of more -----"}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def report_count_valid?(row_count, expected_count)
        
      
        
          13
          
              ((expected_count - row_count) / expected_count.to_f).abs < 0.05
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def send_report_count_error(report_identifier, row_count, expected_count)
        
      
        
          1
          
              NotificationMailer.report_row_count_error(report_identifier, "#{report_identifier} - Report blocked from sending and FTPing because todays count: #{row_count} was too far off the expected count: #{expected_count}").deliver
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_filename(base_filename)
        
      
        
          40
          
              "#{base_filename}.csv"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_report_location(client_name, filename)
        
      
        
          40
          
              File.join(AmazeBot.config[:reports][:location][Utilities.env], client_name.to_s, filename)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_report_email_details(client_name, report_name)
        
      
        
          4
          
              AmazeBot.config[:reports][:clients][client_name][:reports][report_name].with_indifferent_access
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def generate_report_hash(base_filename, client_name, gzip = false)
        
      
        
          37
          
              filename = get_filename base_filename
        
      
        
          37
          
              filename += '.gz' if gzip
        
      
        
          
          
              {
        
      
        
          
          
                base_filename: base_filename,
        
      
        
          
          
                filename: filename,
        
      
        
          
          
                report_location: get_report_location(client_name, filename),
        
      
        
          
          
                report_format: 'csv',
        
      
        
          
          
                client_name: client_name
        
      
        
          37
          
              }
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def move_to_s3(client_name, file, extra_folders = [])
        
      
        
          4
          
              uploader = ReportUploader.new
        
      
        
          4
          
              uploader.client_name = client_name
        
      
        
          4
          
              uploader.add_folders_to_store_dir(extra_folders) if extra_folders.present?
        
      
        
          4
          
              file.flush
        
      
        
          2
          
              uploader.store! file
        
      
        
          
          
          
        
      
        
          2
          
              File.join 'https://s3.amazonaws.com', AmazeBot.config[:carrier_wave][:fog_directory][Utilities.env], uploader.store_dir, File.basename(file.path)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def sql_copy_to_csv(report_key, report_location, raw_connection, sql)
        
      
        
          5
          
              dirname = File.dirname report_location
        
      
        
          5
          
              begin
        
      
        
          5
          
                Dir.mkdir dirname unless File.exists? dirname
        
      
        
          4
          
                csv = File.open report_location, 'wb'
        
      
        
          
          
              rescue Errno::EEXIST
        
      
        
          
          
                # open the file as normal if the dir already exists. This means another process on the same dyno
        
      
        
          
          
                # created it already for this user
        
      
        
          1
          
                csv = File.open report_location, 'wb'
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          5
          
              row_count = 0
        
      
        
          5
          
              $redis.hmset('daily_report_stats', "#{report_key}-row-count", row_count, "#{report_key}-send-time", "Not Sent Yet")
        
      
        
          
          
          
        
      
        
          5
          
              raw_connection.exec("COPY (#{sql}) TO STDOUT WITH CSV HEADER FORCE QUOTE *;")
        
      
        
          15
          
              while !(data = raw_connection.get_copy_data).nil?
        
      
        
          5
          
                row_count += 1
        
      
        
          
          
          
        
      
        
          
          
                # coerce data to ASCII for RH and remove newlines (and duplicate newlines) except for last one (this regex specifically ignores last /n)
        
      
        
          
          
                # also remove Windows carriage returns (\r) and escaped newlines (\\n) in case RH has any issues processing those as well
        
      
        
          
          
                # also manually handle non-unicode quote characters by coercing them to escaped quotes and anything else that
        
      
        
          
          
                # maps to a quote character to a space.
        
      
        
          5
          
                csv << data.to_ascii(INVALID_CHARS_MAPPING).gsub(/\\n+|\r+|\n+(?!$)/, '')
        
      
        
          
          
          
        
      
        
          5
          
                if row_count % 10000 == 0
        
      
        
          10
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_class}(#{report_key}) Copy to CSV wrote: #{row_count} lines -----"}
        
      
        
          5
          
                  $redis.hmset('daily_report_stats', "#{report_key}-row-count", row_count, "#{report_key}-send-time", "Not Sent Yet")
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          5
          
              csv.flush
        
      
        
          5
          
              $redis.hmset('daily_report_stats', "#{report_key}-row-count", row_count, "#{report_key}-send-time", Time.current.to_s)
        
      
        
          10
          
              Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_class}(#{report_key}) Copy to CSV completed: #{row_count} lines-----"}
        
      
        
          
          
          
        
      
        
          5
          
              csv
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def open_csv(report_hash)
        
      
        
          3
          
              dirname = File.dirname report_hash[:report_location]
        
      
        
          3
          
              begin
        
      
        
          3
          
                Dir.mkdir dirname unless File.exists? dirname
        
      
        
          2
          
                CSV.open(report_hash[:report_location], 'wb')
        
      
        
          1
          
              rescue Errno::EEXIST
        
      
        
          
          
                # open the file as normal if the dir already exists. This means another process on the same dyno
        
      
        
          
          
                # created it already for this user
        
      
        
          1
          
                CSV.open(report_hash[:report_location], 'wb')
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def unzip_file (file, destination)
        
      
        
          1
          
              Zip::File.open(file) { |zip_file|
        
      
        
          1
          
               zip_file.each { |f|
        
      
        
          1
          
                 f_path=File.join(destination, f.name)
        
      
        
          1
          
                 FileUtils.mkdir_p(File.dirname(f_path))
        
      
        
          1
          
                 zip_file.extract(f, f_path) unless File.exist?(f_path)
        
      
        
          
          
               }
        
      
        
          
          
              }
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def ftp_to_client(client_name, csv)
        
      
        
          3
          
              if Rails.env.production?
        
      
        
          1
          
                if client_name.to_sym == :rhinc
        
      
        
          
          
                  # FTP CSV File
        
      
        
          1
          
                  Net::FTP.open('ftp.randomhouse.com', 'booklr', 'B00Klr') do |ftp|
        
      
        
          1
          
                    ftp.passive = true
        
      
        
          1
          
                    ftp.chdir 'to_rh'
        
      
        
          1
          
                    ftp.putbinaryfile csv.path
        
      
        
          2
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "----- Report Delivered to RH FTP Server (#{csv.path})-----"}
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  # FTP Completion file
        
      
        
          1
          
                  file = File.open('/tmp/ALLFILES.DONE', 'w')
        
      
        
          1
          
                  Net::FTP.open('ftp.randomhouse.com', 'booklr', 'B00Klr') do |ftp|
        
      
        
          1
          
                    ftp.passive = true
        
      
        
          1
          
                    ftp.chdir 'to_rh'
        
      
        
          1
          
                    ftp.putbinaryfile file.path
        
      
        
          2
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "----- Report Delivered to RH FTP Server (#{file.path})-----"}
        
      
        
          
          
                  end
        
      
        
          1
          
                  File.delete(file)
        
      
        
          1
          
                  file.close
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def ftp_to_rhpg(report_location)
        
      
        
          1
          
              if Rails.env.production?
        
      
        
          1
          
                Net::FTP.open('ftp.randomhouse.com', 'booklr', 'B00Klr') do |ftp|
        
      
        
          1
          
                  ftp.passive = true
        
      
        
          1
          
                  ftp.chdir 'to_rh'
        
      
        
          1
          
                  ftp.putbinaryfile report_location
        
      
        
          2
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- Report Delivered to RH FTP Server (#{report_location})-----"}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def output_customer_behavior_fields(stat_hash, type_string)
        
      
        
          679
          
              stat_hash.keys.select{|k| k.include? type_string}.collect{|key| key.downcase.include?("price") ? (stat_hash[key].present? ? (stat_hash[key].to_i / 100.0) : nil) : stat_hash[key]}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def output_field_and_change_and_percentage_change(stat, last_week_stat, field, float=false)
        
      
        
          13
          
              value = float == true ? stat[field].to_f : stat[field].to_i if stat.has_key?(field) && stat[field].present?
        
      
        
          13
          
              last_value = float == true ? last_week_stat[field].to_f : last_week_stat[field].to_i if last_week_stat.has_key?(field) && last_week_stat[field].present?
        
      
        
          
          
          
        
      
        
          13
          
              if value.present? && last_value.present?
        
      
        
          8
          
                change = value - last_value
        
      
        
          8
          
                percent_change = percent_change_from(last_value, value)
        
      
        
          8
          
                return [value, change, percent_change]
        
      
        
          
          
              end
        
      
        
          5
          
              [value, nil, nil]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def prepend_serialized_data(obj, size)
        
      
        
          2
          
              data = obj.present? ? yield(obj) : Array.new(size)
        
      
        
          2
          
              data = Array.new(size - data.size) + data if data.size < size
        
      
        
          2
          
              data
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def pad_serialized_data(obj, size)
        
      
        
          702
          
              data = obj.present? ? yield(obj) : Array.new(size)
        
      
        
          702
          
              data = data + Array.new(size - data.size) if data.size < size
        
      
        
          702
          
              data
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def isbn_output(isbn)
        
      
        
          5
          
              "=\"#{isbn}\""
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def percent_change_from_for_rank(old_val, new_val)
        
      
        
          9
          
              ((old_val - new_val) * 100.0 / new_val).round(2) if old_val.present? && new_val.present? && new_val != 0
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def percent_change_from(old_val, new_val)
        
      
        
          13
          
              return nil if old_val.blank? || new_val.blank?
        
      
        
          13
          
              return 0.0 if old_val == new_val
        
      
        
          3
          
              ((new_val - old_val) * 100.0 / old_val).round(2) if old_val != 0
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_rhpg_exception_report_row(tracked_book_version)
        
      
        
          6
          
              if tracked_book_version.warehouse_book_version.title.present? && tracked_book_version.warehouse_book_version.author_name.present?
        
      
        
          1
          
                [tracked_book_version.warehouse_book_version.title, tracked_book_version.warehouse_book_version.author_name,
        
      
        
          
          
                 tracked_book_version.warehouse_book_version.isbn_or_asin]
        
      
        
          5
          
              elsif tracked_book_version.metadata.present?
        
      
        
          2
          
                if tracked_book_version.metadata.class == String
        
      
        
          1
          
                  metadata_array = tracked_book_version.metadata.gsub(/^"|"$/, '').split('","')
        
      
        
          1
          
                  metadata_array.count > 1 ? [metadata_array[0], metadata_array[1], tracked_book_version.warehouse_book_version.isbn_or_asin] : ['No data', 'No data', tracked_book_version.warehouse_book_version.isbn_or_asin]
        
      
        
          
          
                else
        
      
        
          1
          
                  metadata_array = tracked_book_version.metadata
        
      
        
          1
          
                  metadata_array.count > 1 ? [metadata_array[0], metadata_array[1], tracked_book_version.warehouse_book_version.isbn_or_asin] : ['No data', 'No data', tracked_book_version.warehouse_book_version.isbn_or_asin]
        
      
        
          
          
                end
        
      
        
          
          
              else
        
      
        
          3
          
                ['No data', 'No data', tracked_book_version.warehouse_book_version.isbn_or_asin]
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_rhinc_exception_report_row(tracked_book_version, book_version_exception)
        
      
        
          6
          
              [tracked_book_version.get_book_title, tracked_book_version.get_author_name,
        
      
        
          
          
               tracked_book_version.warehouse_book_version.isbn_or_asin, tracked_book_version.get_book_version_asin,
        
      
        
          
          
               tracked_book_version.get_book_format, tracked_book_version.get_division_code,
        
      
        
          
          
               book_version_exception.created_at, report_boolean_output(book_version_exception.amazon_not_found_in_search),
        
      
        
          
          
               report_boolean_output(book_version_exception.amazon_ambiguous_result),
        
      
        
          
          
               report_boolean_output(book_version_exception.amazon_no_buy_button),
        
      
        
          
          
               report_boolean_output(book_version_exception.amazon_no_price),
        
      
        
          
          
               report_boolean_output(book_version_exception.bn_not_found_in_search),
        
      
        
          
          
               report_boolean_output(book_version_exception.apple_invalid)]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def report_boolean_output(field)
        
      
        
          50
          
              field ? 'X' : ''
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def gzip_report(csv, client_name)
        
      
        
          2
          
              Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_class}(#{client_name}) starting GZIP process -----"}
        
      
        
          1
          
              `gzip #{csv.path}`
        
      
        
          2
          
              Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_class}(#{client_name}) GZIP Finished -----"}
        
      
        
          
          
          
        
      
        
          1
          
              File.open("#{csv.path}.gz")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def report_class
        
      
        
          20
          
              self.class == Class ? self.to_s : self.class.to_s
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module Formulas
        
      
        
          1
          
            def self.average_growth_rate(beginning_value, end_value, period_length, growth = :positive)
        
      
        
          2
          
              if beginning_value.present? && end_value.present? && period_length > 0
        
      
        
          2
          
                if growth == :positive
        
      
        
          1
          
                  ((end_value.to_f / beginning_value.to_f) ** (1.0 / period_length.to_f)) - 1.0
        
      
        
          
          
                else
        
      
        
          1
          
                  ((beginning_value.to_f / end_value.to_f) ** (1.0 / period_length.to_f)) - 1.0
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module IngestionQueue
        
      
        
          1
          
            BATCH_SIZE = 10
        
      
        
          
          
          
        
      
        
          1
          
            def self.queue_book_versions(number_of_api_calls)
        
      
        
          
          
              # Queue up all orphaned ingestions
        
      
        
          3
          
              IngestionQueue.handle_orphaned_book_versions
        
      
        
          
          
          
        
      
        
          
          
              # In order of priority cascade the number_of_api_calls down each method till you run out of ingestions for that block
        
      
        
          3
          
              count = 0
        
      
        
          6
          
              grouped_ids = WarehouseBookVersion.where(status: %w[validated validated_from_top_100s], tld: Utilities::TLDS).where{(warehouse_book_versions.asin != nil) | (warehouse_book_versions.isbn13 != nil)}.order(:tld, :isbn13).limit(number_of_api_calls * AmazonApiWorkers::GetManyApiResponses::BATCH_SIZE).value_of(:tld, :id, :isbn13).each_with_object({}) do |warehouse_book_version_values, hash|
        
      
        
          44
          
                hash[warehouse_book_version_values[0]] ||= {}
        
      
        
          44
          
                if warehouse_book_version_values[2].present?
        
      
        
          42
          
                  hash[warehouse_book_version_values[0]][:isbn13] ||= []
        
      
        
          42
          
                  hash[warehouse_book_version_values[0]][:isbn13] << warehouse_book_version_values[1]
        
      
        
          
          
                else
        
      
        
          2
          
                  hash[warehouse_book_version_values[0]][:asin] ||= []
        
      
        
          2
          
                  hash[warehouse_book_version_values[0]][:asin] << warehouse_book_version_values[1]
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          3
          
              Utilities::TLDS.each do |tld|
        
      
        
          6
          
                [:isbn13, :asin].each do |key_column|
        
      
        
          12
          
                  if count < number_of_api_calls && grouped_ids[tld].present? && grouped_ids[tld][key_column].present?
        
      
        
          6
          
                    grouped_ids[tld][key_column].each_slice(AmazonApiWorkers::GetManyApiResponses::BATCH_SIZE) do |warehouse_book_version_ids|
        
      
        
          8
          
                      timestamp = Time.current
        
      
        
          8
          
                      WarehouseBookVersion.where(id: warehouse_book_version_ids).update_all(status: :api_call_in_progress, updated_at: timestamp)
        
      
        
          8
          
                      AmazonApiWorkers::GetManyApiResponses.perform_async warehouse_book_version_ids, key_column, tld
        
      
        
          8
          
                      count += 1
        
      
        
          8
          
                      break if count == number_of_api_calls
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          6
          
              Rails.logger.tagged('ingestions') {Rails.logger.info "Completed queuing #{count} book version ingestions"} if count > 0
        
      
        
          3
          
              count
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.handle_orphaned_book_versions
        
      
        
          
          
              # Try and validate all books stuck in new state
        
      
        
          12
          
              WarehouseBookVersion.where{(status == 'new') & (updated_at < Time.current - 10.minutes)}.order(:id).value_of(:id).each do |warehouse_book_version_id|
        
      
        
          1
          
                BookVersionValidationWorkers::ValidateNewBookVersion.perform_async warehouse_book_version_id
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # Ingest all books ready for ingestion since API calls are already done
        
      
        
          12
          
              WarehouseBookVersion.where{(status == 'ready_for_amazon_ingestion') & (updated_at < Time.current - 10.minutes)}.order(:id).value_of(:id).each do |warehouse_book_version_id|
        
      
        
          1
          
                BookVersionWorkers::Ingest.perform_async warehouse_book_version_id
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # Set all api_call_in_progress status books to validated if they get stuck in that state
        
      
        
          12
          
              WarehouseBookVersion.where{(status == 'api_call_in_progress') & (updated_at < Time.current - 10.minutes)}.update_all(status: "validated", updated_at: Time.current)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.ingestions_available
        
      
        
          4
          
              WarehouseBookVersion.where(status: 'validated').where{(warehouse_book_versions.isbn13 != nil) | (warehouse_book_versions.asin != nil)}.count
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module ItunesApi
        
      
        
          1
          
            def self.get_metadata_by_isbn13(isbn13)
        
      
        
          4
          
              metadata = {}
        
      
        
          4
          
              base_url = "http://itunes.apple.com/lookup?isbn=#{isbn13}"
        
      
        
          
          
          
        
      
        
          4
          
              %w[us gb au].each do |region|
        
      
        
          7
          
                url = region == "us" ? base_url : base_url + "&country=#{region}"
        
      
        
          7
          
                result = HttpHelper.get_json(url)
        
      
        
          7
          
                if result.present? && result['resultCount'] > 0
        
      
        
          3
          
                  data = result['results'].first
        
      
        
          3
          
                  metadata[:itunes_id] = data['trackId'].to_s
        
      
        
          3
          
                  metadata[:itunes_pub_date] = ScraperUtilities.parse_date_string(data['releaseDate'])
        
      
        
          3
          
                  metadata[:itunes_genres] = data['genres'].join(', ').first(255)
        
      
        
          3
          
                  break
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          4
          
              metadata
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.get_itunes_data(itunes_id)
        
      
        
          2
          
              results = HttpHelper.get_json("http://itunes.apple.com/lookup?id=#{itunes_id}")
        
      
        
          2
          
              data = HashWithIndifferentAccess.new
        
      
        
          2
          
              if results.present? && results['resultCount'] > 0
        
      
        
          1
          
                result = results['results'].first
        
      
        
          1
          
                data[:itunes_price] = result['price'].to_s.gsub('.','')
        
      
        
          1
          
                data[:itunes_average_rating] = result['averageUserRating']
        
      
        
          1
          
                data[:itunes_rating_count] = result['userRatingCount']
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          2
          
              data
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.get_regional_itunes_data(itunes_id)
        
      
        
          2
          
              %w[gb au].each_with_object(HashWithIndifferentAccess.new) do |region, results|
        
      
        
          4
          
                result = HttpHelper.get_json("http://itunes.apple.com/lookup?id=#{itunes_id}&country=#{region}")
        
      
        
          4
          
                if result.present? && result['resultCount'] > 0
        
      
        
          2
          
                  data = result['results'].first
        
      
        
          2
          
                  results["itunes_#{region}_price"] = data['price'].to_s.gsub('.','')
        
      
        
          2
          
                  results["itunes_#{region}_average_rating"] = data['averageUserRating']
        
      
        
          2
          
                  results["itunes_#{region}_rating_count"] = data['userRatingCount']
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module ListStatWorkers
        
      
        
          1
          
            class QueueAmazonTop100sBlock
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(block_size = 2000)
        
      
        
          1
          
                collection_name = MongoUtilities.daily_collection_name(:amazon_list_stats)
        
      
        
          1
          
                params = WarehouseCategory.amazon.com.canonical.order(:id).value_of(:id).each_slice(block_size).collect do |id_slice|
        
      
        
          3
          
                  [id_slice.first, id_slice.last, collection_name]
        
      
        
          
          
                end
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => ListStatWorkers::QueueAmazonTop100s, 'args' => params)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueAmazonTop100s
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(starting_warehouse_category_id, ending_warehouse_category_id, collection_name)
        
      
        
          1
          
                params = WarehouseCategory.amazon.com.canonical.where(id: starting_warehouse_category_id..ending_warehouse_category_id).order(:id).value_of(:category_id, :tld, :name, :id).collect do |category_id, tld, name, id|
        
      
        
          2
          
                  [category_id, tld, ScraperUtilities.base_category_from_category_name(name), 1, id, collection_name]
        
      
        
          
          
                end
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => MongoListStatWorkers::AmazonTop100, 'args' => params)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueBarnesNobleTop100Scrape
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          1
          
                collection_name = MongoUtilities.daily_collection_name(:bn_list_stats)
        
      
        
          1
          
                params = MongoListStatWorkers::BarnesNobleTop100::LIST_NAMES.values.collect do |list_name|
        
      
        
          2
          
                  [list_name, collection_name]
        
      
        
          
          
                end
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => MongoListStatWorkers::BarnesNobleTop100, 'args' => params)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueAppleTopBooksScrape
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          1
          
                collection_name = MongoUtilities.daily_collection_name(:apple_list_stats)
        
      
        
          1
          
                values = WarehouseCategory.apple.order(:id).value_of(:category_id, :id).each_with_object([]) do |category_id_and_id, array|
        
      
        
          2
          
                  array << [category_id_and_id[0], 'paid', category_id_and_id[1], collection_name]
        
      
        
          2
          
                  array << [category_id_and_id[0], 'free', category_id_and_id[1], collection_name]
        
      
        
          
          
                end
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => MongoListStatWorkers::AppleTopBooksFeed, 'args' => values)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class Contributor
        
      
        
          1
          
            include SAXMachine
        
      
        
          1
          
            element :b036, :as => :author
        
      
        
          
          
          end
        
      
        
          
          
          
        
      
        
          1
          
          class Title
        
      
        
          1
          
            include SAXMachine
        
      
        
          1
          
            element :b203, :as => :main_title
        
      
        
          1
          
            element :b029, :as => :sub_title
        
      
        
          
          
          end
        
      
        
          
          
          
        
      
        
          1
          
          class WorkIdentifier
        
      
        
          1
          
            include SAXMachine
        
      
        
          1
          
            element :b244, :as => :work_id
        
      
        
          
          
          end
        
      
        
          
          
          
        
      
        
          1
          
          class SalesRights
        
      
        
          1
          
            include SAXMachine
        
      
        
          1
          
            element :b089, :as => :sales_right_type
        
      
        
          1
          
            element :b090, :as => :country_codes
        
      
        
          1
          
            element :b388, :as => :territory_codes
        
      
        
          
          
          end
        
      
        
          
          
          
        
      
        
          1
          
          class Product
        
      
        
          1
          
            include SAXMachine
        
      
        
          1
          
            element :a002, :as => :status
        
      
        
          1
          
            element :a001, :as => :isbn13
        
      
        
          1
          
            element :b394, :as => :publishing_status
        
      
        
          1
          
            element :b243, :as => :division_code
        
      
        
          1
          
            element :title, :as => :title, :class => Title
        
      
        
          1
          
            element :workidentifier, :as => :workidentifier, :class => WorkIdentifier
        
      
        
          1
          
            elements :contributor, :as => :contributors, :class => Contributor
        
      
        
          1
          
            elements :salesrights, :as => :salesrights, :class => SalesRights
        
      
        
          
          
          end
        
      
        
          
          
          
        
      
        
          1
          
          module MergePurge
        
      
        
          1
          
            def self.reconcile_onix_xml(xml, user)
        
      
        
          16
          
              Rails.logger.tagged('onix') {Rails.logger.info "--- ONIX Merge purge run for ISBNs in #{File.basename(xml)} for #{user.name} ---"}
        
      
        
          
          
          
        
      
        
          
          
              # Setup
        
      
        
          8
          
              isbns_to_add = []
        
      
        
          8
          
              isbns_to_remove = []
        
      
        
          8
          
              metadata = {}
        
      
        
          8
          
              user_existing_book_versions = user.warehouse_book_versions.value_of(:id, :isbn13)
        
      
        
          8
          
              user_existing_isbns = user_existing_book_versions.collect(&:last).compact #remove all nil isbn13s
        
      
        
          8
          
              user_existing_book_version_ids = user_existing_book_versions.collect(&:first)
        
      
        
          
          
          
        
      
        
          8
          
              if user.email.include? 'rhde@booklr.com'
        
      
        
          2
          
                MergePurge.process_rhde_onix_feed(xml, isbns_to_add, isbns_to_remove, metadata)
        
      
        
          
          
              else
        
      
        
          6
          
                MergePurge.process_rhinc_onix_feed(xml, isbns_to_add, isbns_to_remove, metadata)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # Additions
        
      
        
          8
          
              existing_book_versions_to_add = WarehouseBookVersion.where(isbn13: isbns_to_add, tld: user.tld).value_of(:id, :isbn13)
        
      
        
          8
          
              existing_book_versions_isbns_to_add = existing_book_versions_to_add.collect(&:last)
        
      
        
          
          
          
        
      
        
          16
          
              Rails.logger.tagged('onix') {Rails.logger.info "The following ISBNs already exist on their list: #{(existing_book_versions_isbns_to_add + user_existing_isbns).join(', ')}"}
        
      
        
          
          
          
        
      
        
          8
          
              creation_isbns = isbns_to_add - existing_book_versions_isbns_to_add
        
      
        
          16
          
              Rails.logger.tagged('onix') {Rails.logger.info "Creating the following new ISBNs (#{creation_isbns.count}): #{creation_isbns.join(', ')}"}
        
      
        
          
          
          
        
      
        
          11
          
              creation_ids = creation_isbns.collect{|isbn| WarehouseBookVersion.create(isbn13: isbn, source: user.email, tld: user.tld).id}
        
      
        
          16
          
              Rails.logger.tagged('onix') {Rails.logger.info 'Completed creating new Book Versions'}
        
      
        
          
          
          
        
      
        
          8
          
              existing_book_version_ids_to_add = existing_book_versions_to_add.collect(&:first)
        
      
        
          
          
          
        
      
        
          8
          
              book_version_ids_to_add = creation_ids + (existing_book_version_ids_to_add - user_existing_book_version_ids)
        
      
        
          
          
          
        
      
        
          
          
              # Removals
        
      
        
          8
          
              existing_book_versions_to_remove = WarehouseBookVersion.where(isbn13: isbns_to_remove, tld: user.tld).value_of(:id, :isbn13)
        
      
        
          8
          
              existing_book_versions_isbns_to_remove = existing_book_versions_to_remove.collect(&:last)
        
      
        
          
          
          
        
      
        
          16
          
              Rails.logger.tagged('onix') {Rails.logger.info "The following ISBNs trying to be removed do not exist in the database: #{(isbns_to_remove - existing_book_versions_isbns_to_remove).join(', ')}"}
        
      
        
          16
          
              Rails.logger.tagged('onix') {Rails.logger.info "The following ISBNs trying to be removed do not exist on their list: #{(existing_book_versions_isbns_to_remove - user_existing_isbns).join(', ')}"}
        
      
        
          
          
          
        
      
        
          8
          
              book_version_ids_to_remove = existing_book_versions_to_remove.collect(&:first)
        
      
        
          
          
          
        
      
        
          
          
              # Batch insert new tracked book versions if any
        
      
        
          8
          
              if book_version_ids_to_add.present?
        
      
        
          3
          
                columns = %w[user_id warehouse_book_version_id metadata created_at updated_at]
        
      
        
          3
          
                values = []
        
      
        
          3
          
                timestamp = Time.current
        
      
        
          
          
          
        
      
        
          
          
                # Get all the ids we're adding and map them to their corresponding isbn13 so we can use that to pull from the metadata hash
        
      
        
          6
          
                mapping = WarehouseBookVersion.where(id: book_version_ids_to_add).value_of(:id, :isbn13).each_with_object({}) {|vals, hash| hash[vals.first] = vals.last}
        
      
        
          
          
          
        
      
        
          3
          
                book_version_ids_to_add.each do |book_version_id|
        
      
        
          3
          
                  values << [user.id, book_version_id, metadata[mapping[book_version_id]].to_yaml, timestamp, timestamp]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          3
          
                TrackedBookVersion.batch_insert(columns, values)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          16
          
              Rails.logger.tagged('onix') {Rails.logger.info "Removing the following new ISBNs (#{existing_book_versions_isbns_to_remove.count}): #{existing_book_versions_isbns_to_remove.join(', ')}"}
        
      
        
          
          
          
        
      
        
          
          
              # Batch delete all the removals if any
        
      
        
          8
          
              TrackedBookVersion.delete_all(warehouse_book_version_id: book_version_ids_to_remove, user_id: user.id) if book_version_ids_to_remove.present?
        
      
        
          
          
          
        
      
        
          16
          
              Rails.logger.tagged('onix') {Rails.logger.info "--- Completed associating new list for #{user.name} ---"}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.process_rhinc_onix_feed(xml, isbns_to_add, isbns_to_remove, metadata)
        
      
        
          
          
              # Use Nokogiri Reader / Sax-Machine to handle parsing huge files that wont fit in memory
        
      
        
          8
          
              reader = Nokogiri::XML::Reader(xml)
        
      
        
          8
          
              while reader.read
        
      
        
          384
          
                if reader.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT and reader.name == 'product'
        
      
        
          12
          
                  product = Product.parse(reader.outer_xml)
        
      
        
          
          
          
        
      
        
          
          
                  # status of 02 or 03 means an add or change, status of 05 means an outright removal from the system
        
      
        
          12
          
                  if product.status == '02' || product.status == '03'
        
      
        
          
          
                    # publishing_status of 02 means forthcoming title, 04 means active title so add these
        
      
        
          
          
                    # any other status means the title is to be removed if it exists on their list currently
        
      
        
          7
          
                    if product.publishing_status == '02' || product.publishing_status == '04'
        
      
        
          4
          
                      isbns_to_add << product.isbn13
        
      
        
          
          
          
        
      
        
          4
          
                      title = product.title.sub_title.present? ? (product.title.main_title + ' - ' + product.title.try(:sub_title)) : product.title.main_title
        
      
        
          4
          
                      author = product.contributors.first.author if product.contributors.first.present?
        
      
        
          4
          
                      metadata[product.isbn13] = ['', title, author, product.division_code]
        
      
        
          
          
                    else
        
      
        
          3
          
                      isbns_to_remove << product.isbn13
        
      
        
          
          
                    end
        
      
        
          
          
                  elsif product.status == '05'
        
      
        
          5
          
                    isbns_to_remove << product.isbn13
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.process_rhde_onix_feed(xml, isbns_to_add, isbns_to_remove, metadata)
        
      
        
          4
          
              work_id_array = []
        
      
        
          4
          
              count = 0
        
      
        
          
          
          
        
      
        
          
          
              # Use Nokogiri Reader / Sax-Machine to handle parsing huge files that wont fit in memory
        
      
        
          4
          
              reader = Nokogiri::XML::Reader(xml)
        
      
        
          4
          
              while reader.read
        
      
        
          308
          
                if reader.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT and reader.name == 'product'
        
      
        
          8
          
                  product = Product.parse(reader.outer_xml)
        
      
        
          
          
          
        
      
        
          
          
                  # status of 02 or 03 means an add or change, status of 05 means an outright removal from the system
        
      
        
          8
          
                  if product.status == '02' || product.status == '03'
        
      
        
          
          
                    # publishing_status of 02 means forthcoming title, 04 means active title so add these
        
      
        
          
          
                    # any other status means the title is to be removed if it exists on their list currently
        
      
        
          6
          
                    if product.publishing_status == '02' || product.publishing_status == '04'
        
      
        
          4
          
                      count+=1
        
      
        
          4
          
                      p count if count % 500 == 0
        
      
        
          
          
          
        
      
        
          4
          
                      isbns_to_add << product.isbn13
        
      
        
          
          
          
        
      
        
          4
          
                      title = product.title.sub_title.present? ? (product.title.main_title + ' - ' + product.title.try(:sub_title)) : product.title.main_title
        
      
        
          4
          
                      author = product.contributors.first.author if product.contributors.first.present?
        
      
        
          4
          
                      metadata[product.isbn13] = ['', title, author, product.division_code]
        
      
        
          
          
          
        
      
        
          
          
                      # capture work_id into a hash for insertion into mongo
        
      
        
          4
          
                      work_id_array << {_id: product.isbn13, work_id: product.workidentifier.work_id}.with_indifferent_access if product.workidentifier.present?
        
      
        
          
          
                    else
        
      
        
          2
          
                      isbns_to_remove << product.isbn13
        
      
        
          
          
                    end
        
      
        
          
          
                  elsif product.status == '05'
        
      
        
          2
          
                    isbns_to_remove << product.isbn13
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # work_id_collection contains additions for every delta we process, our mongo collection will hold all mappings whether they are removed or not from their list
        
      
        
          4
          
              if work_id_array.present?
        
      
        
          
          
                # this will work when we upgrade mongo/mongo driver?
        
      
        
          
          
                #MongoUtilities.work_id_collection.insert(work_id_array, {continue_on_error: true, collect_on_error: true})
        
      
        
          
          
          
        
      
        
          
          
                # in the mean time pull the whole collection into memory, diff and then insert the diff
        
      
        
          4
          
                work_id_mapping = []
        
      
        
          4
          
                MongoUtilities.work_id_collection.find.each {|x| work_id_mapping << x.with_indifferent_access}
        
      
        
          4
          
                diff = work_id_array - work_id_mapping
        
      
        
          4
          
                if diff.present?
        
      
        
          8
          
                  isbns = diff.collect{|x| x['_id']}
        
      
        
          
          
          
        
      
        
          
          
                  # remove all existing documents from the collection that coincide with the diff (in case a work_id has been updated), then insert
        
      
        
          4
          
                  MongoUtilities.work_id_collection.remove({'_id' => {'$in' => isbns}})
        
      
        
          4
          
                  MongoUtilities.work_id_collection.insert diff
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.reconcile_asin_and_isbn13_csv_no_metadata(csv, user)
        
      
        
          
          
              invalid = []
        
      
        
          
          
              incoming_isbns = []
        
      
        
          
          
              incoming_asins = []
        
      
        
          
          
              user_existing_book_versions = user.warehouse_book_versions.value_of(:id, :asin, :isbn13)
        
      
        
          
          
              user_existing_isbns = user_existing_book_versions.collect(&:last).compact #remove all nil isbn13s
        
      
        
          
          
              user_existing_asins = user_existing_book_versions.collect(&:second).compact #remove all nil asins
        
      
        
          
          
              user_existing_book_version_ids = user_existing_book_versions.collect(&:first)
        
      
        
          
          
          
        
      
        
          
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "--- Merge purge run for keys in #{File.basename(csv)} for #{user.name} ---"}
        
      
        
          
          
          
        
      
        
          
          
              # CSV Processor is split out in case we have to have different processors for different users
        
      
        
          
          
              CSV.foreach(csv, :quote_char => '"', :col_sep =>',', :row_sep =>:auto) do |row|
        
      
        
          
          
                if row[6].present? || row[7].present?
        
      
        
          
          
                  isbn13 = row[6].strip if row[6].present?
        
      
        
          
          
                  asin = row[7].strip if row[7].present?
        
      
        
          
          
                  if ISBN_Tools.is_valid_isbn13?(isbn13)
        
      
        
          
          
                    incoming_isbns << isbn13
        
      
        
          
          
                  elsif asin.present? && (asin.length == 9 || asin.length == 10)
        
      
        
          
          
                    # prepend 0 to asins that are 9 digits to make it a valid 10 isbn10. excel causes preceding 0s to be stripped.
        
      
        
          
          
                    incoming_asins << (asin.length == 9 ? '0'+asin : asin)
        
      
        
          
          
                    invalid << isbn13 if isbn13.present?
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Found the following invalid ISBN13s #{invalid.join(', ')}"}
        
      
        
          
          
          
        
      
        
          
          
              # process all the data
        
      
        
          
          
              self.process_incoming_data(user, user_existing_isbns, user_existing_asins, incoming_isbns, incoming_asins, user_existing_book_version_ids)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.reconcile_asin_and_isbn13_array_no_metadata(data, user)
        
      
        
          
          
              invalid = []
        
      
        
          
          
              incoming_isbns = []
        
      
        
          
          
              incoming_asins = []
        
      
        
          
          
              user_existing_book_versions = user.warehouse_book_versions.value_of(:id, :asin, :isbn13)
        
      
        
          
          
              user_existing_isbns = user_existing_book_versions.collect(&:last).compact #remove all nil isbn13s
        
      
        
          
          
              user_existing_asins = user_existing_book_versions.collect(&:second).compact #remove all nil asins
        
      
        
          
          
              user_existing_book_version_ids = user_existing_book_versions.collect(&:first)
        
      
        
          
          
          
        
      
        
          
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "--- Merge purge run for keys from Vook Salesfroce for #{user.name} ---"}
        
      
        
          
          
          
        
      
        
          
          
              data.each do |isbn, asin|
        
      
        
          
          
                if isbn.present? || asin.present?
        
      
        
          
          
                  if ISBN_Tools.is_valid_isbn13?(isbn)
        
      
        
          
          
                    incoming_isbns << isbn
        
      
        
          
          
                  elsif asin.present?
        
      
        
          
          
                    incoming_asins << asin
        
      
        
          
          
                    invalid << isbn if isbn.present?
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Found the following invalid ISBN13s #{invalid.join(', ')}"}
        
      
        
          
          
          
        
      
        
          
          
              # process all the data
        
      
        
          
          
              self.process_incoming_data(user, user_existing_isbns, user_existing_asins, incoming_isbns, incoming_asins, user_existing_book_version_ids)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.process_incoming_data(user, user_existing_isbns, user_existing_asins, incoming_isbns, incoming_asins, user_existing_book_version_ids)
        
      
        
          
          
          
        
      
        
          
          
              user_existing_keys = user_existing_isbns + user_existing_asins
        
      
        
          
          
              incoming_keys = incoming_isbns + incoming_asins
        
      
        
          
          
          
        
      
        
          
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Adding: #{(incoming_keys - user_existing_keys).count} out of a total new list size of: #{incoming_keys.count}"}
        
      
        
          
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Matched: #{(user_existing_keys & incoming_keys).count} from the incoming list with the existing list."}
        
      
        
          
          
          
        
      
        
          
          
              existing_book_versions = WarehouseBookVersion.where{(asin >> incoming_asins) | (isbn13 >> incoming_isbns)}.where(tld: user.tld).value_of(:id, :status, :asin, :isbn13)
        
      
        
          
          
              existing_book_versions_isbns = existing_book_versions.collect(&:last)
        
      
        
          
          
              existing_book_versions_asins = existing_book_versions.collect(&:third)
        
      
        
          
          
              existing_keys = existing_book_versions_isbns + existing_book_versions_asins
        
      
        
          
          
          
        
      
        
          
          
              creation_keys = incoming_keys - existing_keys
        
      
        
          
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Creating the following new keys: #{creation_keys.join(', ')}"}
        
      
        
          
          
          
        
      
        
          
          
              creation_ids = creation_keys.collect{|key| (key.length == 13 ? WarehouseBookVersion.create(isbn13: key, source: user.email, tld: user.tld) : WarehouseBookVersion.create(asin: key, source: user.email, tld: user.tld)).id}
        
      
        
          
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Completed creating new Book Versions #{creation_ids.join(', ')}"}
        
      
        
          
          
          
        
      
        
          
          
              existing_book_version_ids = existing_book_versions.collect(&:first)
        
      
        
          
          
          
        
      
        
          
          
              book_version_ids_to_add = creation_ids + (existing_book_version_ids - user_existing_book_version_ids)
        
      
        
          
          
              book_version_ids_to_remove = user_existing_book_version_ids - existing_book_version_ids
        
      
        
          
          
          
        
      
        
          
          
              # Batch insert new tracked book versions if any
        
      
        
          
          
              if book_version_ids_to_add.present?
        
      
        
          
          
                columns = %w[user_id warehouse_book_version_id created_at updated_at]
        
      
        
          
          
                values = []
        
      
        
          
          
                timestamp = Time.current
        
      
        
          
          
          
        
      
        
          
          
                book_version_ids_to_add.each do |book_version_id|
        
      
        
          
          
                  values << [user.id, book_version_id, timestamp, timestamp]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                TrackedBookVersion.batch_insert(columns, values)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Removing book version ids: #{(book_version_ids_to_remove).count} from the user list: #{book_version_ids_to_remove.join(', ')}"}
        
      
        
          
          
          
        
      
        
          
          
              # Batch delete all the removals if any
        
      
        
          
          
              TrackedBookVersion.delete_all(warehouse_book_version_id: book_version_ids_to_remove, user_id: user.id) if book_version_ids_to_remove.present?
        
      
        
          
          
          
        
      
        
          
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "--- Completed associating new list for #{user.name} ---"}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.reconcile_csv(csv, user)
        
      
        
          9
          
              incoming = []
        
      
        
          9
          
              invalid = []
        
      
        
          9
          
              user_existing_book_versions = user.warehouse_book_versions.value_of(:id, :isbn13)
        
      
        
          9
          
              user_existing_isbns = user_existing_book_versions.collect(&:last).compact #remove all nil isbn13s
        
      
        
          9
          
              user_existing_book_version_ids = user_existing_book_versions.collect(&:first)
        
      
        
          
          
          
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "--- Merge purge run for ISBNs in #{File.basename(csv)} for #{user.name} ---"}
        
      
        
          
          
          
        
      
        
          
          
              # CSV Processor is split out in case we have to have different processors for different users
        
      
        
          9
          
              self.process_rhuk_or_rhpg_csv(csv, incoming, invalid)
        
      
        
          
          
          
        
      
        
          
          
              # Sort the incoming list so it matches the sorted exists_uningested list below when zipping
        
      
        
          9
          
              incoming = incoming.sort_by(&:first)
        
      
        
          
          
          
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Found the following invalid ISBN13s #{invalid.join(', ')}"}
        
      
        
          
          
          
        
      
        
          9
          
              incoming_isbns = incoming.collect(&:first)
        
      
        
          
          
          
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Removing: #{(user_existing_isbns - incoming_isbns).count} book versions and adding: #{(incoming_isbns - user_existing_isbns).count} out of a total new list size of: #{incoming_isbns.count}"}
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Matched: #{(user_existing_isbns & incoming_isbns).count} from the incoming_isbns list with the existing list."}
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Removing the following ISBNs: #{(user_existing_isbns - incoming_isbns).join(', ')}"}
        
      
        
          
          
          
        
      
        
          9
          
              existing_book_versions = WarehouseBookVersion.where(isbn13: incoming_isbns, tld: user.tld).value_of(:id, :status, :asin, :isbn13)
        
      
        
          9
          
              existing_book_versions_isbns = existing_book_versions.collect(&:last)
        
      
        
          
          
          
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Associating the following existing ISBNs: #{(existing_book_versions_isbns - user_existing_isbns).join(', ')}"}
        
      
        
          
          
          
        
      
        
          9
          
              creation_isbns = incoming_isbns - existing_book_versions_isbns
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Creating the following new ISBNs: #{creation_isbns.join(', ')}"}
        
      
        
          
          
          
        
      
        
          14
          
              creation_ids = creation_isbns.collect{|isbn| WarehouseBookVersion.create(isbn13: isbn, source: user.email, tld: user.tld).id}
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "Completed creating new Book Versions #{creation_ids.join(', ')}"}
        
      
        
          
          
          
        
      
        
          9
          
              existing_book_version_ids = existing_book_versions.collect(&:first)
        
      
        
          
          
          
        
      
        
          9
          
              book_version_ids_to_add = creation_ids + (existing_book_version_ids - user_existing_book_version_ids)
        
      
        
          9
          
              book_version_ids_to_remove = user_existing_book_version_ids - existing_book_version_ids
        
      
        
          
          
          
        
      
        
          
          
              # Batch insert new tracked book versions if any
        
      
        
          9
          
              if book_version_ids_to_add.present?
        
      
        
          6
          
                columns = %w[user_id warehouse_book_version_id created_at updated_at]
        
      
        
          6
          
                values = []
        
      
        
          6
          
                timestamp = Time.current
        
      
        
          
          
          
        
      
        
          6
          
                book_version_ids_to_add.each do |book_version_id|
        
      
        
          6
          
                  values << [user.id, book_version_id, timestamp, timestamp]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          6
          
                TrackedBookVersion.batch_insert(columns, values)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # Batch delete all the removals if any
        
      
        
          9
          
              TrackedBookVersion.delete_all(warehouse_book_version_id: book_version_ids_to_remove, user_id: user.id) if book_version_ids_to_remove.present?
        
      
        
          
          
          
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "--- Completed associating new list for #{user.name} ---"}
        
      
        
          
          
          
        
      
        
          
          
              # Only populate metadata for uningested books, this includes the newly created books from above (order by isbn13 so zip matches)
        
      
        
          18
          
              existing_uningested_book_versions = WarehouseBookVersion.where{status != 'ingested'}.where(isbn13: incoming_isbns, tld: user.tld).order(:isbn13).value_of(:id, :isbn13)
        
      
        
          9
          
              existing_uningested_book_version_isbns = existing_uningested_book_versions.collect(&:last)
        
      
        
          9
          
              existing_uningested_book_version_ids = existing_uningested_book_versions.collect(&:first)
        
      
        
          
          
          
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info "--- Setting metadata for uningested books: #{existing_uningested_book_version_isbns.count} ---"}
        
      
        
          
          
          
        
      
        
          
          
              # rows with no metadata will be set to []
        
      
        
          54
          
              metadata = incoming.select{|row| existing_uningested_book_version_isbns.include?(row.first)}.map {|x| x.drop 1}
        
      
        
          
          
          
        
      
        
          9
          
              existing_uningested_book_version_ids.zip(metadata).each do |book_version_id, metadata|
        
      
        
          22
          
                TrackedBookVersion.find_by(user_id: user.id, warehouse_book_version_id: book_version_id).update_attribute :metadata, metadata
        
      
        
          
          
              end
        
      
        
          18
          
              Rails.logger.tagged('merge_purge') {Rails.logger.info '--- Completed setting metadata for uningested books ---'}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.process_rhuk_or_rhpg_csv(csv, incoming, invalid)
        
      
        
          9
          
              CSV.foreach(csv, :quote_char => '"', :col_sep =>',', :row_sep =>:auto, encoding: 'windows-1251:utf-8') do |row|
        
      
        
          24
          
                if row[0].present?
        
      
        
          24
          
                  isbn13 = row[0].strip
        
      
        
          24
          
                  asin = row[1].strip if row[1].present?
        
      
        
          24
          
                  if ISBN_Tools.is_valid_isbn13?(isbn13)
        
      
        
          
          
                    # prepend 0 to asins that are 9 digits to make it a valid 10 isbn10. excel causes preceding 0s to be stripped.
        
      
        
          55
          
                    incoming << row.map {|value| asin.present? && value == asin && asin.length == 9 ? '0'+ value : value}
        
      
        
          
          
                  else
        
      
        
          1
          
                    invalid << isbn13
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.update_metadata(xml, user)
        
      
        
          3
          
              metadata = []
        
      
        
          3
          
              count = 0
        
      
        
          3
          
              invalid = 0
        
      
        
          3
          
              reader = Nokogiri::XML::Reader(xml)
        
      
        
          
          
          
        
      
        
          3
          
              while reader.read
        
      
        
          125
          
                if reader.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT and reader.name == 'product'
        
      
        
          3
          
                  product = Product.parse(reader.outer_xml)
        
      
        
          3
          
                  if product.publishing_status == '02' || product.publishing_status == '04'
        
      
        
          2
          
                    title = product.title.sub_title.present? ? (product.title.main_title + ' - ' + product.title.try(:sub_title)) : product.title.main_title
        
      
        
          2
          
                    author = product.contributors.first.author if product.contributors.first.present?
        
      
        
          2
          
                    metadata << {isbn13: product.isbn13, author: author, title: title, division_code: product.division_code}
        
      
        
          
          
                  else
        
      
        
          1
          
                    invalid +=1
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          3
          
                  count +=1
        
      
        
          3
          
                  Rails.logger.tagged('merge_purge') {Rails.logger.info "--- Processed #{count} products from the ONIX full XML found #{invalid} so far ---"} if count % 1000 == 0
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          3
          
              count = 0
        
      
        
          3
          
              metadata.each do |data|
        
      
        
          2
          
                data[:id] = WarehouseBookVersion.where(isbn13: data[:isbn13], tld: user.tld).value_of(:id).first
        
      
        
          2
          
                count +=1
        
      
        
          2
          
                Rails.logger.tagged('merge_purge') {Rails.logger.info "--- Processed #{count} metadata objects and added BookVersion ids ---"} if count % 1000 == 0
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          3
          
              count = 0
        
      
        
          3
          
              missing = 0
        
      
        
          3
          
              metadata.each do |metadata_item|
        
      
        
          2
          
                if metadata_item[:id].present?
        
      
        
          
          
                  # Original metadata format has asin as the first item so just leaving that blank for now
        
      
        
          2
          
                  metadata_array = ['', metadata_item[:title], metadata_item[:author], metadata_item[:division_code]]
        
      
        
          2
          
                  tracked_book_version = TrackedBookVersion.where(warehouse_book_version_id: metadata_item[:id], user_id: user.id).first
        
      
        
          2
          
                  if tracked_book_version.present?
        
      
        
          1
          
                    tracked_book_version.update_attributes(metadata: metadata_array)
        
      
        
          
          
                  else
        
      
        
          2
          
                    Rails.logger.tagged('merge_purge') {Rails.logger.info "--- #{metadata_item[:id]} BookVersion doesn't exist for this user, adding it ---"}
        
      
        
          1
          
                    TrackedBookVersion.create(warehouse_book_version_id: metadata_item[:id], user_id: user.id, metadata: metadata_array)
        
      
        
          1
          
                    missing += 1
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          2
          
                count +=1
        
      
        
          2
          
                Rails.logger.tagged('merge_purge') {Rails.logger.info "--- Updated: #{count} @ #{Time.current} metadata fields, found #{missing} missing titles from the user list ---"} if count % 1000 == 0
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module MongoUtilities
        
      
        
          1
          
            PLP_COLLECTION_NAME = 'print_list_prices'.freeze
        
      
        
          1
          
            ASIN_LIST_COLLECTION_NAME = 'all_asins'.freeze
        
      
        
          1
          
            WORK_ID_COLLECTION_NAME = 'isbn13_to_work_id'.freeze
        
      
        
          1
          
            DAILY_COLLECTION_TYPES = %w[stats amazon_list_stats bn_list_stats apple_list_stats book_version_exceptions
        
      
        
          
          
                                        author_page_data de_competitive_format_data promotion_pages].freeze
        
      
        
          
          
          
        
      
        
          1
          
            extend self
        
      
        
          
          
          
        
      
        
          1
          
            def daily_collection_name(collection_type, date = Date.current)
        
      
        
          18113
          
              raise ArgumentError unless DAILY_COLLECTION_TYPES.include? collection_type.to_s
        
      
        
          
          
          
        
      
        
          18111
          
              "#{collection_type.to_s}_#{date.to_s.gsub('-', '')}"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def daily_collection(collection_type, date = Date.current)
        
      
        
          18067
          
              $mongodb.collection daily_collection_name(collection_type, date)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def work_id_collection
        
      
        
          16
          
              $mongodb.collection(MongoUtilities::WORK_ID_COLLECTION_NAME)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def repair_database
        
      
        
          1
          
              $mongodb.command repairDatabase: 1
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scheduler_collection
        
      
        
          37
          
              $mongodb.collection('scheduler_state')
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def add_exception_to_collection(collection, warehouse_book_version_id, exceptions)
        
      
        
          29
          
              collection.update({_id: warehouse_book_version_id}, {'$set' => exceptions}, upsert: true) if exceptions.values.any? {|true_or_false| true_or_false}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def all_asin_document(asin, tld)
        
      
        
          15
          
              {asin: asin, tld: tld}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def exists_in_all_asin_list?(asin, tld)
        
      
        
          9
          
              $mongodb.collection(MongoUtilities::ASIN_LIST_COLLECTION_NAME).find(all_asin_document(asin, tld)).limit(1).first.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def add_documents_to_all_asin_list(all_asin_documents)
        
      
        
          7
          
              $mongodb.collection(MongoUtilities::ASIN_LIST_COLLECTION_NAME).insert(all_asin_documents, continue_on_error: true)
        
      
        
          
          
            rescue Mongo::OperationFailure => e
        
      
        
          2
          
              raise e unless e.error_code == 11000 # Ignore duplicate key errors
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def initialize_daily_scrape_field_counts(date_string)
        
      
        
          6
          
              MongoUtilities.daily_scrape_field_counts_collection.update({date: date_string}, {date: date_string}, {upsert: true})
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def daily_scrape_field_counts_collection
        
      
        
          265
          
              $mongodb.collection 'daily_scrape_field_counts'
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module PostgresConfiguration
        
      
        
          1
          
            @@hostname = YAML.load(File.read('./config/postgres_databases.yml')).with_indifferent_access
        
      
        
          
          
          
        
      
        
          1
          
            def self.configuration_hash_for(db_name)
        
      
        
          2
          
              uri = URI.parse(@@hostname[db_name.upcase])
        
      
        
          
          
              {host: uri.host,
        
      
        
          
          
               database: uri.path[1..-1],
        
      
        
          
          
               username: uri.user,
        
      
        
          
          
               password: uri.password,
        
      
        
          
          
               port: uri.port,
        
      
        
          
          
               adapter: 'postgresql',
        
      
        
          
          
               encoding: 'utf8',
        
      
        
          2
          
               min_messages: 'WARNING'}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.change_database_connection(activerecord_model, db_name)
        
      
        
          1
          
              activerecord_model.establish_connection configuration_hash_for(db_name)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.get_raw_connection_for_db(db_name)
        
      
        
          1
          
              uri = URI.parse(@@hostname[db_name.upcase])
        
      
        
          1
          
              get_raw_connection uri.host, uri.port, uri.path[1..-1], uri.user, uri.password
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.get_raw_connection(host, port, dbname, user, password)
        
      
        
          1
          
              PG::Connection.connect host, port, nil, nil, dbname, user, password
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module PostgresUtilities
        
      
        
          1
          
            def self.fix_column_sequence(connection, table_name, column_name)
        
      
        
          4
          
              max_value = connection.execute("select max(#{column_name}) from #{table_name}").first['max']
        
      
        
          4
          
              new_val = connection.execute("select setval(pg_get_serial_sequence('#{table_name}', '#{column_name}'), #{max_value.nil? ? '1, false' : max_value})").first['setval']
        
      
        
          
          
          
        
      
        
          4
          
              max_value.nil? ? new_val.to_i : new_val.to_i + 1
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.finalize_copy_command(connection, errmsg)
        
      
        
          263
          
              errmsg ? connection.put_copy_end(errmsg) : connection.put_copy_end
        
      
        
          
          
          
        
      
        
          263
          
              command_ok = false
        
      
        
          263
          
              error_message = nil
        
      
        
          263
          
              while res = connection.get_result
        
      
        
          263
          
                command_ok = true if res.result_status == PG::PGRES_COMMAND_OK
        
      
        
          263
          
                error_message = res.error_message
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          263
          
              raise error_message unless command_ok
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.get_new_connection(model)
        
      
        
          
          
              PostgresConfiguration.get_raw_connection model.connection.raw_connection.host,
        
      
        
          
          
                                                       model.connection.raw_connection.port,
        
      
        
          
          
                                                       model.connection.raw_connection.db,
        
      
        
          
          
                                                       model.connection.raw_connection.user,
        
      
        
          
          
                                                       model.connection.raw_connection.pass
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.copy_to_csv(sql, filename, raw_connection = nil)
        
      
        
          
          
              dirname = File.dirname filename
        
      
        
          
          
              begin
        
      
        
          
          
                Dir.mkdir dirname unless File.exists? dirname
        
      
        
          
          
                csv = File.open filename, 'wb'
        
      
        
          
          
              rescue Errno::EEXIST
        
      
        
          
          
                # open the file as normal if the dir already exists. This means another process on the same dyno
        
      
        
          
          
                # created it already for this user
        
      
        
          
          
                csv = File.open filename, 'wb'
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              conn = raw_connection || ActiveRecord::Base.connection.raw_connection
        
      
        
          
          
              conn.exec("COPY (#{sql}) TO STDOUT CSV")
        
      
        
          
          
              while !(data = raw_connection.get_copy_data).nil?
        
      
        
          
          
                csv << data
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              csv.flush
        
      
        
          
          
              csv.close
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          
          
          # IMPORTANT NOTE: the variables set in TCPSocket are global and affect all threads on a sidekiq instance.
        
      
        
          
          
          # This allows us to share a single port for all threads on an instance which lets us perfectly distribute load across all tor clients
        
      
        
          1
          
          module ProxyUtilities
        
      
        
          1
          
            SITE_KEYS = [:amazon, :barnes_and_noble]
        
      
        
          1
          
            $throttled_sites = Hash.new(false)
        
      
        
          
          
          
        
      
        
          1
          
            def self.force_proxy
        
      
        
          10
          
              TCPSocket::socks_server = 'tor.vook.com'
        
      
        
          10
          
              TCPSocket::socks_port = RedisUtilities.get_available_tor_port unless TCPSocket::socks_port.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.using_proxy?
        
      
        
          
          
              TCPSocket::socks_server.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.port
        
      
        
          
          
              TCPSocket::socks_port
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # check if dyno is throttled, if it is, and we haven't already set the proxy, set the proxy and scrape, otherwise scrape from instance IP
        
      
        
          1
          
            def self.proxy_setup(site_key)
        
      
        
          72
          
              raise InvalidArgument unless SITE_KEYS.include? site_key
        
      
        
          72
          
              sync_throttled_state site_key
        
      
        
          
          
          
        
      
        
          72
          
              if $throttled_sites[site_key]
        
      
        
          
          
                TCPSocket::socks_server = 'tor.vook.com'
        
      
        
          
          
                # get port list from redis list unless its already set for the instance
        
      
        
          
          
                TCPSocket::socks_port = RedisUtilities.get_available_tor_port unless TCPSocket::socks_port.present?
        
      
        
          
          
              else
        
      
        
          72
          
                TCPSocket::socks_server = nil
        
      
        
          72
          
                TCPSocket::socks_port = nil
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def self.sync_throttled_state(site_key)
        
      
        
          72
          
              if RedisUtilities.is_dyno_throttled? != $throttled_sites[site_key]
        
      
        
          
          
                p "#{site_key} mismatch between redis and instance throttled state, toggling state on dyno: #{Utilities.dyno_id}"
        
      
        
          
          
                $throttled_sites[site_key] = !$throttled_sites[site_key]
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module RedisUtilities
        
      
        
          1
          
            extend self
        
      
        
          
          
          
        
      
        
          1
          
            COUNT_KEYS = %i[amazon_statable_scrape_job_count bn_statable_scrape_job_count itunes_statable_scrape_job_count
        
      
        
          5
          
                            goodreads_statable_scrape_job_count amazon_author_page_scrape_job_count].each_with_object({}.with_indifferent_access) {|key, hash| hash[key] = key.to_s}.freeze
        
      
        
          1
          
            JSON_SET_KEYS = {scraped_categories: 'top-100-categories'}.with_indifferent_access.freeze
        
      
        
          1
          
            SET_KEYS = JSON_SET_KEYS
        
      
        
          1
          
            AVAILABLE_PORT_LIST = 'portlist'.freeze
        
      
        
          1
          
            BLOCK_SIZE = 10000
        
      
        
          
          
          
        
      
        
          
          
            # populate a redis list with a list of all the ports available (multiplied by the thread count) for connections
        
      
        
          
          
            # eg. 10 clients with 10 ports each = 100 ports total and 100 workers with 5 threads a piece = 100 ports listed 5 times each
        
      
        
          
          
            # make sure that you actually have enough tor clients up and running to support the worker count
        
      
        
          1
          
            def populate_port_list(worker_count)
        
      
        
          2
          
              $redis.keys('*-throttled').each {|x| $redis.del(x)}
        
      
        
          2
          
              $redis.del AVAILABLE_PORT_LIST
        
      
        
          
          
          
        
      
        
          2
          
              start_port = 9050
        
      
        
          2
          
              worker_count.times do |client_num|
        
      
        
          400
          
                $redis.lpush(AVAILABLE_PORT_LIST, start_port+client_num)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_available_tor_port
        
      
        
          10
          
              $redis.rpoplpush(AVAILABLE_PORT_LIST, AVAILABLE_PORT_LIST)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def set_dyno_throttled
        
      
        
          
          
              dyno_id = Utilities.dyno_id
        
      
        
          
          
              $redis.setex("#{dyno_id}-throttled", 3600, 1)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def is_dyno_throttled?
        
      
        
          72
          
              dyno_id = Utilities.dyno_id
        
      
        
          72
          
              $redis.exists("#{dyno_id}-throttled")
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def clear_sleeping_and_phantom_workers(queue, max_age)
        
      
        
          40
          
              workers = get_approximate_worker_list
        
      
        
          
          
          
        
      
        
          40
          
              worker_msgs = $redis.pipelined do
        
      
        
          40
          
                workers.each do |worker_id|
        
      
        
          
          
                  $redis.get("sidekiq:worker:#{worker_id}")
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # attach to each other
        
      
        
          40
          
              worker_ids_and_msgs = workers.zip(worker_msgs)
        
      
        
          
          
          
        
      
        
          
          
              # delete all blank workers if any
        
      
        
          40
          
              $redis.del worker_ids_and_msgs.select{|worker_id, msg| worker_id if msg.blank?}.collect{|worker_id, msg| "sidekiq:worker:#{worker_id}"} if worker_ids_and_msgs.select{|worker_id, msg| worker_id if msg.blank?}.present?
        
      
        
          
          
          
        
      
        
          
          
              # clear phantoms or sleepers
        
      
        
          40
          
              worker_ids_and_msgs.each do |worker_id, msg|
        
      
        
          
          
                $redis.srem('sidekiq:workers', worker_id) if msg.blank?
        
      
        
          
          
                if msg.present? && queue.include?(Sidekiq.load_json(msg)['queue'])
        
      
        
          
          
                  run_at = Sidekiq.load_json(msg)['run_at']
        
      
        
          
          
                  if Time.current.to_i - run_at > max_age
        
      
        
          
          
                    $redis.srem('sidekiq:workers', worker_id)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          40
          
              worker_ids_and_msgs.count
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Redis.scan the workers set to get an approximate list of all workers
        
      
        
          1
          
            def get_approximate_worker_list
        
      
        
          40
          
              cursor = 0
        
      
        
          40
          
              all_workers = []
        
      
        
          40
          
              while cursor != '0'
        
      
        
          40
          
                cursor, workers = $redis.sscan('sidekiq:workers', cursor.to_i)
        
      
        
          40
          
                all_workers += workers
        
      
        
          
          
              end
        
      
        
          40
          
              all_workers
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Careful when using this, its VERY slow if the set is large (larger than 10k)
        
      
        
          1
          
            def worker_count(queue)
        
      
        
          
          
              workers = $redis.smembers('sidekiq:workers')
        
      
        
          
          
              worker_msgs = $redis.pipelined do
        
      
        
          
          
                workers.each do |worker_id|
        
      
        
          
          
                  $redis.get("sidekiq:worker:#{worker_id}")
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              worker_msgs.compact.sum do |msg|
        
      
        
          
          
                Sidekiq.load_json(msg)['queue'].include?(queue) ? 1 : 0
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_scrape_count_key(scope)
        
      
        
          19
          
              :"#{scope}_scrape_job_count"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def set_count(key, number)
        
      
        
          19
          
              assert_valid_key COUNT_KEYS, key
        
      
        
          
          
          
        
      
        
          18
          
              $redis.set COUNT_KEYS[key], number
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_count(key)
        
      
        
          27
          
              assert_valid_key COUNT_KEYS, key
        
      
        
          
          
          
        
      
        
          26
          
              $redis.get(COUNT_KEYS[key]).to_i
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def add_to_set(key, values)
        
      
        
          6
          
              assert_valid_key SET_KEYS, key
        
      
        
          
          
          
        
      
        
          5
          
              transformed_values = JSON_SET_KEYS.keys.include?(key.to_s) ? values.collect(&:to_json) : values
        
      
        
          5
          
              $redis.sadd SET_KEYS[key], transformed_values
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_set_members(key)
        
      
        
          7
          
              assert_valid_key SET_KEYS, key
        
      
        
          
          
          
        
      
        
          6
          
              members = $redis.smembers SET_KEYS[key]
        
      
        
          18
          
              JSON_SET_KEYS.keys.include?(key.to_s) ? members.collect {|member| JSON.parse(member)} : members
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def clear_set(key)
        
      
        
          2
          
              assert_valid_key SET_KEYS, key
        
      
        
          
          
          
        
      
        
          1
          
              $redis.del SET_KEYS[key]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def assert_valid_key(key_hash, key)
        
      
        
          65
          
              raise ArgumentError.new("Invalid redis key, valid keys are #{key_hash.keys}") unless key_hash.stringify_keys.keys.include? key.to_s
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module ReportGenerator
        
      
        
          1
          
            extend self
        
      
        
          1
          
            extend EnterpriseReports
        
      
        
          
          
          
        
      
        
          1
          
            def run(date, tld, report_cards: [])
        
      
        
          3
          
              warehouse_region_id = WarehouseRegion.find_by(tld: tld).id
        
      
        
          3
          
              warehouse_date_id = WarehouseDate.find_by(date: date).id
        
      
        
          
          
          
        
      
        
          714
          
              relation = WarehouseStat.select(WarehouseStat.column_names.collect {|column_name| WarehouseStat.arel_table[column_name]}).
        
      
        
          
          
                                       joins(:warehouse_book_version, :warehouse_date).
        
      
        
          
          
                                       outer_joins(:warehouse_amazon_category1, :warehouse_amazon_category2, :warehouse_amazon_category3,
        
      
        
          
          
                                                   :sub_category1_author_rank_id, :sub_category2_author_rank_id, :sub_category3_author_rank_id, :sub_category4_author_rank_id).
        
      
        
          
          
                                       join_select('inner', false,
        
      
        
          
          
                                                   warehouse_book_version: %w[asin isbn13 bn_id status book_format pub_date title publisher sold_by pages physical_details author_name author_asin],
        
      
        
          
          
                                                   warehouse_date: %w[date]).
        
      
        
          
          
                                       join_select('outer', false,
        
      
        
          
          
                                                   warehouse_amazon_category1: %w[category_id name category_type depth status tx_book_category_id tld parent_id],
        
      
        
          
          
                                                   warehouse_amazon_category2: %w[category_id name category_type depth status tx_book_category_id tld parent_id],
        
      
        
          
          
                                                   warehouse_amazon_category3: %w[category_id name category_type depth status tx_book_category_id tld parent_id],
        
      
        
          
          
                                                   sub_category1_author_rank_id: %w[category_id name category_type depth status tx_book_category_id tld parent_id],
        
      
        
          
          
                                                   sub_category2_author_rank_id: %w[category_id name category_type depth status tx_book_category_id tld parent_id],
        
      
        
          
          
                                                   sub_category3_author_rank_id: %w[category_id name category_type depth status tx_book_category_id tld parent_id],
        
      
        
          
          
                                                   sub_category4_author_rank_id: %w[category_id name category_type depth status tx_book_category_id tld parent_id]).
        
      
        
          
          
                                       where(warehouse_region_id: warehouse_region_id, warehouse_date_id: warehouse_date_id).
        
      
        
          
          
                                       order(:warehouse_book_version_id, :created_at)
        
      
        
          3
          
              row_keys = relation.select_values.collect do |attribute|
        
      
        
          921
          
                attr = (attribute.respond_to?(:left) ? attribute.left : attribute)
        
      
        
          
          
          
        
      
        
          921
          
                "#{attr.relation.name}_#{attr.name}"
        
      
        
          
          
              end
        
      
        
          924
          
              price_column_indexes = row_keys.each_with_index.collect {|row_key, index| index if row_key.include?('_price')}.compact
        
      
        
          
          
          
        
      
        
          3
          
              report_cards.each do |report_card|
        
      
        
          6
          
                report_card.report_date = date
        
      
        
          6
          
                report_card.generate_row_key_indexes row_keys
        
      
        
          6
          
                report_card.open_csv
        
      
        
          6
          
                report_card.set_row_count 0
        
      
        
          6
          
                report_card.set_time_sent nil
        
      
        
          6
          
                report_card.insert_header!
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          3
          
              total_row_count = 0
        
      
        
          9
          
              report_row_counts = report_cards.each_with_object({}) {|report_card, hash| hash[report_card.report_key] = 0}
        
      
        
          3
          
              update_row_counts report_row_counts, report_generator_row_count: total_row_count, report_generator_completion_time: 'Not Sent Yet'
        
      
        
          6
          
              Rails.logger.tagged('enterprise') {Rails.logger.info "----- ReportGenerator starting COPY -----"}
        
      
        
          
          
          
        
      
        
          3
          
              raw_connection = WarehouseStat.connection.raw_connection
        
      
        
          3
          
              raw_connection.exec("COPY (#{relation.to_sql}) TO STDOUT CSV")
        
      
        
          9
          
              while !(data = raw_connection.get_copy_data).nil?
        
      
        
          3
          
                total_row_count += 1
        
      
        
          3
          
                row = preprocess_data price_column_indexes, data
        
      
        
          9
          
                report_cards.each {|report_card| report_row_counts[report_card.report_key] += 1 if report_card.output_row!(row_keys, row)}
        
      
        
          3
          
                update_row_counts report_row_counts, report_generator_row_count: total_row_count if total_row_count % 10000 == 0
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          6
          
              Rails.logger.tagged('enterprise') {Rails.logger.info "----- ReportGenerator Finished: #{total_row_count} rows -----"}
        
      
        
          3
          
              update_row_counts report_row_counts, report_generator_row_count: total_row_count, report_generator_completion_time: Time.current.to_s
        
      
        
          
          
          
        
      
        
          3
          
              report_cards.each do |report_card|
        
      
        
          6
          
                report_card.finalize_output!
        
      
        
          6
          
                report_card.move_to_s3
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          3
          
              report_cards.each do |report_card|
        
      
        
          6
          
                report_card.deliver_report
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          7
          
              ReportCards::ReportCard.send_rhinc_ftp_completion if report_cards.any? {|report_card| report_card.send_all_complete?}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def update_row_counts(report_row_counts, **extra_keys)
        
      
        
          32
          
              keys_and_values = report_row_counts.collect {|key, count| ["#{key}_row_count", count]}.flatten + extra_keys.collect {|key_and_count| key_and_count}.flatten
        
      
        
          7
          
              $redis.hmset 'daily_report_stats', *keys_and_values
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def preprocess_data(price_column_indexes, data)
        
      
        
          5
          
              row = parse_data data
        
      
        
          
          
          
        
      
        
          5
          
              price_column_indexes.each do |index|
        
      
        
          106
          
                row[index] = ReportUtilities.as_price(row[index])
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          5
          
              row
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def parse_data(data)
        
      
        
          9
          
              ",#{data}".scan /(?<=,)(?:"(?:""|[^"])*")(?=,|\n)|(?<=,)(?:[^,]*?)(?=,|\n)/
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module ReportUtilities
        
      
        
          1
          
            extend self
        
      
        
          
          
          
        
      
        
          1
          
            def as_price(value)
        
      
        
          106
          
              return value unless value.present?
        
      
        
          
          
          
        
      
        
          1
          
              '%.2f' % (value.to_i / 100.0)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def ftp_from_s3(s3_public_link)
        
      
        
          
          
              # Note file has to be public before this can run correctly
        
      
        
          
          
              file = File.new("#{Rails.root}/tmp/#{File.basename(s3_public_link)}", 'wb')
        
      
        
          
          
              file << open(s3_public_link).read
        
      
        
          
          
              file.flush
        
      
        
          
          
          
        
      
        
          
          
              EnterpriseReports.ftp_to_client(:rhinc, file)
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module ScraperUtilities
        
      
        
          1
          
            CURRENCY_CONDITION = "(contains(text(), '$') or contains(text(), '£') or contains(text(), 'EUR'))".freeze
        
      
        
          
          
          
        
      
        
          1
          
            def self.get_category_id_from_url(url)
        
      
        
          46
          
              /\/digital-text\/([^\/]+)/.match(url).try(:[], 1) || /\/books\/([^\/]+)/.match(url).try(:[], 1)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.extract_asin_from_url(url)
        
      
        
          354
          
              url.scan(/.*\/(?:dp|product-reviews|e|gp\/product)\/(?:(?:(.*?)\/)|(.*$))/).flatten.compact.first
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.match_url_with_asins(urls, asins)
        
      
        
          16
          
              urls.find {|url| asins.include? extract_asin_from_url(url)} if urls.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.cleanse_price(text)
        
      
        
          
          
              # Some prices are displayed as ranges, we can set those to nil safely
        
      
        
          175
          
              return nil if text.include? '-'
        
      
        
          
          
          
        
      
        
          174
          
              splitchar = text.include?('EUR') ? ',' : '.'
        
      
        
          
          
          
        
      
        
          174
          
              if text.include?('FREE')
        
      
        
          1
          
                text = '000'
        
      
        
          
          
              elsif text.split(splitchar).last.present? && text.split(splitchar).last.length < 2
        
      
        
          1
          
                text << '0'
        
      
        
          
          
              elsif text.exclude?(splitchar)
        
      
        
          1
          
                text << '00'
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # Remove all non numerical characters and remove whitespace
        
      
        
          174
          
              text.gsub(/\D/, '')
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.cleanse_string(text)
        
      
        
          
          
              # accepts only characters with valid encodings and strips out UTF8 newlines (subbed to a space) and invisible control characters (subbed to nothing)
        
      
        
          1692
          
              text.chars.select{|i| i.valid_encoding?}.join.gsub(/\p{Co}|\p{Cs}|\p{Cn}/, '').gsub(/\p{Zl}|\p{Zp}|\n/, ' ')
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.parse_ean_from_bn_url(url)
        
      
        
          
          
              # Maybe upgrade this later to take an entire a DOM element and decide between data-bn-rel or href like find_ean_by_first_result does?
        
      
        
          114
          
              url.include?('ean=') ? url.partition('&').first.partition('ean=').third : url.partition('?').first.split('/').last
        
      
        
          
          
          
        
      
        
          
          
              # If the ean parameter ever moves and isn't the first param in the query string, switch to this regexp and it should be fixed
        
      
        
          
          
              # /\?.*ean=(.*?)&|$/.match(url)[1]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.convert_author_rank_category_name(category_name)
        
      
        
          2982
          
              category_name.present? && category_name.starts_with?('Kindle eBooks') ? "Kindle Store > #{category_name}" : category_name
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.parse_date_string(text)
        
      
        
          29
          
              Date.parse(text) if text.present?
        
      
        
          
          
            rescue ArgumentError
        
      
        
          4
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.base_category_from_category_name(category_name)
        
      
        
          30
          
              category_name.scan(/[^>]+/).first.squish
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.coerce_amazon_format(format)
        
      
        
          
          
              # Some stupid pages have ", .exe" and ", .doc", and other weird things in the related format book formats. Stupid.
        
      
        
          11
          
              format = format.gsub(', .exe', '').gsub(', .doc', '').gsub(', 3.5 inch diskette', '').gsub(', .wks', '').gsub(', .xml', '')
        
      
        
          
          
          
        
      
        
          11
          
              if format.include?('Mass Market')
        
      
        
          2
          
                'MassMarketPaperback'
        
      
        
          9
          
              elsif format.include?('Kindle')
        
      
        
          3
          
                'Kindle Edition'
        
      
        
          
          
              else
        
      
        
          6
          
                format.camelize
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.bn_no_results?(page)
        
      
        
          12
          
              page.blank? || (page.css('div.search-noresults-message').present? && page.css('div.search-noresults-message').text.strip.include?('Sorry, we could not find what you were looking for.'))
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.force_absolute_url(url, base_url)
        
      
        
          119
          
              URI.parse(URI.encode(base_url)).merge(URI.encode(url)).to_s
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module StatsScraper
        
      
        
          1
          
            extend self
        
      
        
          
          
          
        
      
        
          1
          
            def get_amazon_product_page_stats(page)
        
      
        
          1
          
              stats = {likes: page.scrape_likes,
        
      
        
          
          
                       amazon_price: page.scrape_amazon_price,
        
      
        
          
          
                       digital_list_price: page.scrape_digital_list_price,
        
      
        
          
          
                       amazon_list_price: page.scrape_amazon_list_price,
        
      
        
          
          
                       amazon_sales_rank: page.scrape_sales_rank,
        
      
        
          
          
                       amazon_sales_rank_category: page.scrape_sales_rank_category,
        
      
        
          
          
                       also_bought: page.scrape_also_boughts,
        
      
        
          
          
                       bought_after_viewing: page.scrape_bought_after_viewing,
        
      
        
          
          
                       frequently_bought_together: page.scrape_frequently_bought_together,
        
      
        
          
          
                       similar_items_by_category: page.scrape_similar_items_by_category,
        
      
        
          
          
                       similar_items_by_category_external_id: page.scrape_similar_items_by_category_id,
        
      
        
          
          
                       amazon_average_rating: page.scrape_amazon_average_rating,
        
      
        
          
          
                       amazon_review_count: page.scrape_amazon_review_count,
        
      
        
          
          
                       author_ranks: page.scrape_author_ranks,
        
      
        
          
          
                       amazon_availability: page.scrape_availability,
        
      
        
          
          
                       kindle_unlimited: page.scrape_kindle_unlimited}
        
      
        
          
          
          
        
      
        
          1
          
              subcategory_stats = {}
        
      
        
          
          
              page.scrape_sub_categories_and_ranks.each_with_index do |sub_category_and_rank, i|
        
      
        
          2
          
                if sub_category_and_rank.present?
        
      
        
          2
          
                  subcategory_stats[:"sub_category#{i + 1}_rank"] = sub_category_and_rank[:rank]
        
      
        
          2
          
                  subcategory_stats[:"sub_category#{i + 1}_tree"] = sub_category_and_rank[:category]
        
      
        
          2
          
                  subcategory_stats[:"sub_category#{i + 1}_id"] = sub_category_and_rank[:category_id]
        
      
        
          
          
                end
        
      
        
          1
          
              end if page.scrape_sub_categories_and_ranks.present?
        
      
        
          1
          
              stats.merge! subcategory_stats
        
      
        
          
          
          
        
      
        
          1
          
              stats.merge! page.scrape_related_format_data if page.scrape_related_format_data.present?
        
      
        
          1
          
              stats.merge! page.scrape_star_rating_distribution if page.scrape_star_rating_distribution.present?
        
      
        
          
          
          
        
      
        
          1
          
              stats
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_amazon_author_page_stats(author_page)
        
      
        
          2
          
              also_bought_bys = author_page.scrape_also_bought_items_by
        
      
        
          2
          
              if also_bought_bys.present?
        
      
        
          1
          
                also_bought_bys = also_bought_bys.each_with_index.each_with_object({}) do |author_and_index, hash|
        
      
        
          16
          
                  author, index = author_and_index
        
      
        
          16
          
                  hash[:"amazon_also_bought_items_by_#{index + 1}"] = author
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          2
          
              also_bought_bys || {}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # TODO refactor this, scrape_cheapest_print_list_price needs to change and then this will too so we can handle captchas
        
      
        
          1
          
            def get_amazon_lowest_print_list_price(page, book_format)
        
      
        
          1
          
              big_six_publishers = ['Hachette Book Group', 'HarperCollins Publishers', 'HarperCollins Publishing', 'Macmillan',
        
      
        
          
          
                                    'Penguin Publishing', 'Random House Digital, Inc.', 'Random House Mondadori',
        
      
        
          
          
                                    'Simon and Schuster Digital Sales Inc']
        
      
        
          1
          
              sold_by = page.scrape_sold_by
        
      
        
          1
          
              if sold_by.present? && big_six_publishers.include?(sold_by) && book_format.present? && book_format.include?('Kindle')
        
      
        
          1
          
                lowest = page.scrape_cheapest_print_list_price
        
      
        
          1
          
                lowest[:print_list_price] if lowest.present?
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
          
        
      
        
          1
          
            def get_amazon_competitive_stats(page)
        
      
        
          1
          
              stats = {amazon_price: page.scrape_amazon_price,
        
      
        
          
          
                       digital_list_price: page.scrape_digital_list_price,
        
      
        
          
          
                       amazon_list_price: page.scrape_amazon_list_price,
        
      
        
          
          
                       amazon_sales_rank: page.scrape_sales_rank,
        
      
        
          
          
                       publisher: page.scrape_publisher,
        
      
        
          
          
                       pub_date: page.scrape_pub_date.to_s,
        
      
        
          
          
                       physical_details: page.physical_details,
        
      
        
          
          
                       language: page.scrape_language,
        
      
        
          
          
                       isbn13: page.scrape_isbn_13,
        
      
        
          
          
                       title: page.scrape_title,
        
      
        
          
          
                       author: page.scrape_author
        
      
        
          
          
                      }
        
      
        
          
          
          
        
      
        
          1
          
              subcategory_stats = {}
        
      
        
          
          
              page.scrape_sub_categories_and_ranks.each_with_index do |sub_category_and_rank, i|
        
      
        
          2
          
                if sub_category_and_rank.present?
        
      
        
          2
          
                  subcategory_stats[:"sub_category#{i + 1}_rank"] = sub_category_and_rank[:rank]
        
      
        
          2
          
                  subcategory_stats[:"sub_category#{i + 1}_tree"] = sub_category_and_rank[:category]
        
      
        
          2
          
                  subcategory_stats[:"sub_category#{i + 1}_id"] = sub_category_and_rank[:category_id]
        
      
        
          
          
                end
        
      
        
          1
          
              end if page.scrape_sub_categories_and_ranks.present?
        
      
        
          1
          
              stats.merge! subcategory_stats
        
      
        
          
          
          
        
      
        
          1
          
              stats
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_stats_for_ean(page)
        
      
        
          1
          
              stats = {bn_price: page.scrape_price,
        
      
        
          
          
                       bn_nook_price: page.scrape_nook_price,
        
      
        
          
          
                       bn_nook_list_price: page.scrape_nook_list_price,
        
      
        
          
          
                       bn_list_price: page.scrape_list_price,
        
      
        
          
          
                       barnes_sales_rank: page.scrape_sales_rank,
        
      
        
          
          
                       bn_also_bought: page.scrape_also_boughts,
        
      
        
          
          
                       barnes_average_rating: page.scrape_average_rating,
        
      
        
          
          
                       barnes_rating_count: page.scrape_rating_count,
        
      
        
          
          
                       barnes_review_count: page.scrape_review_count}
        
      
        
          
          
          
        
      
        
          1
          
              stats.merge!(page.scrape_related_format_data || {})
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_stats_for_itunes(itunes_id, tld)
        
      
        
          2
          
              tld == '.com' ? ItunesApi.get_itunes_data(itunes_id) : ItunesApi.get_regional_itunes_data(itunes_id)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_stats_for_goodreads(goodreads_url, key, tld)
        
      
        
          3
          
              if tld == '.com'
        
      
        
          2
          
                goodreads_page = goodreads_url.present? ? GoodreadsBookPage.new(goodreads_url) : GoodreadsBookPage.by_key(key)
        
      
        
          2
          
                return {} unless goodreads_page.ok?
        
      
        
          
          
          
        
      
        
          2
          
                goodreads_page.rating_details || {}
        
      
        
          
          
              else
        
      
        
          1
          
                {}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module Utilities
        
      
        
          1
          
            TLDS = %w[.com .co.uk].freeze
        
      
        
          
          
          
        
      
        
          1
          
            def self.env
        
      
        
          212
          
              (defined?(Rails).present? ? Rails.env : ENV['RAILS_ENV']) || 'development'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.pad_customer_behavior_data(customer_behavior_data, max_size)
        
      
        
          1497
          
              column_values = Array.new(max_size)
        
      
        
          1497
          
              if customer_behavior_data.present?
        
      
        
          1496
          
                customer_behavior_data.each_with_index do |data, index|
        
      
        
          11957
          
                  yield(column_values, data, index)
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1497
          
              column_values.first(max_size)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.prepare_string_for_copy(string)
        
      
        
          28263
          
              return string unless string.is_a? String
        
      
        
          28247
          
              begin
        
      
        
          28247
          
                truncated_string = string.gsub(/\p{Co}|\p{Cs}|\p{Cn}/, '').gsub("\\", "\\\\\\").gsub(/\\xEC/i, "\\\\\\xEC").gsub(/\\xEE/i, "\\\\\\xEE").gsub("\"", "\\\"").gsub(/\p{Zl}|\p{Zp}|\n/, ' ')[0..254]
        
      
        
          
          
              rescue ArgumentError
        
      
        
          1
          
                truncated_string = string.encode('UTF-16', 'UTF-8', invalid: :replace, replace: '').encode('UTF-8', 'UTF-16').gsub(/\p{Co}|\p{Cs}|\p{Cn}/, '').gsub("\\", "\\\\\\").gsub(/\\xEC/i, "\\\\\\xEC").gsub(/\\xEE/i, "\\\\\\xEE").gsub("\"", "\\\"").gsub("\n", "\\n")[0..254]
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          28247
          
              while truncated_string.end_with?("\\")
        
      
        
          1
          
                truncated_string.chop!
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          28247
          
              "\"#{truncated_string}\""
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.prepare_integer_for_copy(integer)
        
      
        
          22705
          
              return integer.presence if integer.blank? || integer.is_a?(Integer)
        
      
        
          
          
          
        
      
        
          22703
          
              integer.gsub(/\(|\)|,/, '').to_i
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.prepare_float_for_copy(float)
        
      
        
          4239
          
              return float.presence if float.blank? || float.is_a?(Float)
        
      
        
          
          
          
        
      
        
          4237
          
              float.gsub(/\(|\)|,/, '').to_f
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.ignore_bad_price_for_copy(string)
        
      
        
          2495
          
              string.present? && !(string.include?('-') || string.include?('/') || string.downcase.include?('click to see') || string.to_i > 2147483647) ? string : nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.check_flag_name(flag_name)
        
      
        
          6
          
              raise 'Invalid Flag' unless %w[historic_etl ensure_one_historic_etl ingestions conditional_log].include? flag_name
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.set_flag(flag_name)
        
      
        
          2
          
              check_flag_name flag_name
        
      
        
          2
          
              $redis.set "utilities:flags:#{flag_name}", 1
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.unset_flag(flag_name)
        
      
        
          2
          
              check_flag_name flag_name
        
      
        
          2
          
              $redis.set "utilities:flags:#{flag_name}", 0
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.is_flag_set?(flag_name)
        
      
        
          3
          
              check_flag_name flag_name
        
      
        
          3
          
              $redis.get("utilities:flags:#{flag_name}") == '1'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.method_missing(meth, *args, &block)
        
      
        
          4
          
              if meth.to_s =~ /^enable_(.+)$/
        
      
        
          1
          
                set_flag $1
        
      
        
          3
          
              elsif meth.to_s =~ /^disable_(.+)$/
        
      
        
          1
          
                unset_flag $1
        
      
        
          2
          
              elsif meth.to_s =~ /^(.+)_enabled\?$/
        
      
        
          1
          
                is_flag_set? $1
        
      
        
          
          
              else
        
      
        
          1
          
                super
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.is_author_asin?(asin)
        
      
        
          
          
              # This is based on the assumption that all author asins are in this format "BXXXXXXXXX" where X can be any character or number
        
      
        
          6
          
              asin.present? && asin.length == 10 && asin[0] == 'B' && asin.scan(/[[:alnum:]]/).length == asin.length
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.upload_page_to_s3(page, name)
        
      
        
          1
          
              html = page.is_a?(Page) ? page.dom : page
        
      
        
          1
          
              file = File.new("/tmp/amazon-#{Time.now.to_s.parameterize}#{name.parameterize}.html", 'w')
        
      
        
          1
          
              file.syswrite(html)
        
      
        
          1
          
              uploader = HtmlPageUploader.new
        
      
        
          1
          
              uploader.store! file
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.job_class_on_queue?(job_klasses)
        
      
        
          24
          
              Array.wrap(job_klasses).any? do |job_klass|
        
      
        
          59
          
                Sidekiq::Queue.new(job_klass.to_s.constantize.sidekiq_options_hash['queue']).any? {|job| job.klass == job_klass.to_s}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.job_class_in_retry_set?(job_klasses, retry_count_threshold = -1)
        
      
        
          52
          
              Sidekiq::RetrySet.new.any? {|job| Array.wrap(job_klasses).any? {|job_klass| job_klass.to_s == job.klass} && job['retry_count'] > retry_count_threshold}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.job_class_being_processed?(job_klasses)
        
      
        
          
          
              Sidekiq.redis do |conn|
        
      
        
          
          
                conn.smembers('workers').map do |w|
        
      
        
          4
          
                  msg = conn.get("worker:#{w}")
        
      
        
          4
          
                  msg ? Sidekiq.load_json(msg)['payload']['class'] : nil
        
      
        
          23
          
                end.compact.uniq
        
      
        
          31
          
              end.any? {|klass| Array.wrap(job_klasses).any? {|job_klass| job_klass.to_s == klass}}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.class_in_sidekiq?(job_klasses)
        
      
        
          24
          
              job_class_on_queue?(job_klasses) || job_class_in_retry_set?(job_klasses) || job_class_being_processed?(job_klasses)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.get_depth_from_category_name(name)
        
      
        
          6
          
              name.count '>'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.determine_key_type(key)
        
      
        
          20
          
              if key.nil?
        
      
        
          
          
                nil
        
      
        
          12
          
              elsif key.starts_with?('294') && key.length == 13
        
      
        
          1
          
                :bn_id
        
      
        
          11
          
              elsif ISBN_Tools.is_valid_isbn10?(key)
        
      
        
          1
          
                :isbn10
        
      
        
          10
          
              elsif ISBN_Tools.is_valid_isbn13?(key)
        
      
        
          4
          
                :isbn13
        
      
        
          6
          
              elsif key.length == 10
        
      
        
          3
          
                :asin
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.is_isbn?(key)
        
      
        
          7
          
              [:isbn13, :isbn10].include? determine_key_type(key)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.get_url_hints_from_metadata(metadatas)
        
      
        
          
          
              metadatas.collect do |metadata|
        
      
        
          10
          
                asin = metadata.present? ? metadata[0].presence : nil
        
      
        
          10
          
                asin.present? && asin.length == 10 ? asin : nil
        
      
        
          19
          
              end.compact
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.bn_format_code(book_format)
        
      
        
          10
          
              case book_format
        
      
        
          
          
                when 'Hardcover'
        
      
        
          1
          
                  '1519'
        
      
        
          
          
                when 'Paperback'
        
      
        
          1
          
                  '1521'
        
      
        
          
          
                when 'NOOK Book'
        
      
        
          5
          
                  '2734'
        
      
        
          
          
                else
        
      
        
          
          
                  nil
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.log(tag, text)
        
      
        
          1
          
              p "[#{tag}] - #{text}" if env == 'production' || env == 'staging'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.process_type
        
      
        
          
          
              ENV['DYNO'].split('.').first if Utilities.env == 'production' || Utilities.env == 'staging'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.dyno_id
        
      
        
          72
          
              ENV['DYNO'].split('.').last if Utilities.env == 'production' || Utilities.env == 'staging'
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module Validations
        
      
        
          1
          
            def self.amazon_not_found_in_search?(search_page)
        
      
        
          2
          
              search_page.scrape_search_results_urls.blank?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.amazon_ambiguous_search_results?(search_page, url_hints)
        
      
        
          4
          
              search_result_urls = search_page.scrape_search_results_urls
        
      
        
          4
          
              search_result_urls.present? && search_result_urls.count > 1 && ScraperUtilities.match_url_with_asins(search_result_urls, url_hints).blank?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.amazon_no_price?(book_page)
        
      
        
          2
          
              book_page.scrape_amazon_price.blank?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.amazon_no_image?(book_page)
        
      
        
          2
          
              !book_page.book_image_exists?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.amazon_no_buy_button?(book_page)
        
      
        
          2
          
              !book_page.buy_button_exists?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.no_isbn?(isbn_or_asin)
        
      
        
          2
          
              !Utilities.is_isbn?(isbn_or_asin)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.bn_not_found_in_search?(page)
        
      
        
          2
          
              page.no_results?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def self.apple_invalid?(isbn_or_asin)
        
      
        
          2
          
              ItunesApi.get_metadata_by_isbn13(isbn_or_asin) == {}
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class BaseSerializer
        
      
        
          1
          
            def initialize(object)
        
      
        
          9
          
              @object = object
        
      
        
          9
          
              @valid_keys = []
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def as_json(options = {})
        
      
        
          8
          
              if @object.respond_to?(:to_ary)
        
      
        
          3
          
                @object.collect {|obj| select_keys obj}
        
      
        
          
          
              else
        
      
        
          7
          
                select_keys @object
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def select_keys(object)
        
      
        
          150
          
              @valid_keys.each_with_object(HashWithIndifferentAccess.new) {|key, hash| hash[key] = object.send key}
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class BookVersionCategorySerializer < BaseSerializer
        
      
        
          1
          
            def initialize(object)
        
      
        
          1
          
              super
        
      
        
          1
          
              @valid_keys = %w[id category_name warehouse_book_version_id]
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class BookVersionSerializer < BaseSerializer
        
      
        
          1
          
            def initialize(object)
        
      
        
          1
          
              super
        
      
        
          1
          
              @valid_keys = %w[id asin bn_id book_format isbn13 pub_date status title publisher sold_by pages physical_details author_name author_asin itunes_id tld duplicate_key source itunes_pub_date itunes_genres canonical_amazon_url canonical_bn_url canonical_goodreads_url amazon_book_description]
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class CategorySerializer < BaseSerializer
        
      
        
          1
          
            def initialize(object)
        
      
        
          1
          
              super
        
      
        
          1
          
              @valid_keys = %w[id category_id depth name category_type status tld parent_id canonical_category_id]
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class CategoryStatSerializer < BaseSerializer
        
      
        
          1
          
            def initialize(object)
        
      
        
          1
          
              super
        
      
        
          1
          
              @valid_keys = %w[id best_rank worst_rank book_version_count mean_rank median_rank category_name best_rank_book_version_id
        
      
        
          
          
                               worst_rank_book_version_id warehouse_region_id warehouse_date_id warehouse_category_id date tld]
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class ListStatSerializer < BaseSerializer
        
      
        
          1
          
            def initialize(object)
        
      
        
          1
          
              super
        
      
        
          1
          
              @valid_keys = %w[id warehouse_book_version_id warehouse_category_id warehouse_date_id warehouse_trend_id days_in_top_100 name rank price author title asin isbn bn_id itunes_id list_type]
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class ProductStatSerializer < BaseSerializer
        
      
        
          1
          
            def initialize(object)
        
      
        
          1
          
              super
        
      
        
          1
          
              @valid_keys = WarehouseStat::WAREHOUSE_STAT_FIELDS +
        
      
        
          
          
                            %w[id date tld asin isbn13 amazon_similar_item_category_names amazon_similar_item_category_external_ids]
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module AmazonApiWorkers
        
      
        
          1
          
            class GetManyApiResponses
        
      
        
          1
          
              BATCH_SIZE = 10
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :ingest_low
        
      
        
          
          
          
        
      
        
          1
          
              def perform(book_version_ids, key_type, tld)
        
      
        
          2
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          2
          
                  ActiveRecord::Base.logger.level = Logger::FATAL
        
      
        
          2
          
                  warehouse_book_versions = WarehouseBookVersion.where(id: book_version_ids, tld: tld)
        
      
        
          2
          
                  if warehouse_book_versions.present?
        
      
        
          2
          
                    items_by_key = AmazonApi.get_all_items_by_keys_and_tld warehouse_book_versions.collect(&:"#{key_type}"), key_type, tld
        
      
        
          
          
          
        
      
        
          2
          
                    warehouse_book_versions.each do |warehouse_book_version|
        
      
        
          4
          
                      warehouse_book_version.update_amazon_api_response items: items_by_key[warehouse_book_version.send key_type]
        
      
        
          4
          
                      BookVersionWorkers::Ingest.perform_async warehouse_book_version.id if warehouse_book_version.status == :ready_for_amazon_ingestion
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module BackupWorkers
        
      
        
          1
          
            class NightlyMongo
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :sync
        
      
        
          
          
          
        
      
        
          1
          
              def perform(date_string = Date.current.to_s)
        
      
        
          4
          
                Rails.logger.tagged('backup') {Rails.logger.info "Creating Mongo Backup at #{Time.current}"}
        
      
        
          2
          
                date = date_string.to_date
        
      
        
          
          
          
        
      
        
          
          
                # Drop all collections from 7 days ago + 7 days ago author_page_data
        
      
        
          18
          
                collection_types.collect {|collection_type| MongoUtilities.daily_collection(collection_type, date - 7.days).drop}
        
      
        
          4
          
                Rails.logger.tagged('backup') {Rails.logger.info "Mongo old collections dropped at #{Time.current}"}
        
      
        
          
          
          
        
      
        
          
          
                # Backup all daily stats collections + list stat data + exceptions (can add author page, de_competitive collections later)
        
      
        
          
          
                # flatten all the lists of lists together into a single collection list
        
      
        
          18
          
                collections = collection_types.collect {|collection_type| MongoUtilities.daily_collection(collection_type, date)}
        
      
        
          
          
          
        
      
        
          
          
                # run backup on collections
        
      
        
          2
          
                HerokuMongoBackup::Backup.new(date).backup(collections)
        
      
        
          4
          
                Rails.logger.tagged('backup') {Rails.logger.info "Mongo Backup completed at #{Time.current}"}
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def collection_types
        
      
        
          4
          
                MongoUtilities::DAILY_COLLECTION_TYPES
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class RestoreMongoBackup
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :superhighmem
        
      
        
          
          
          
        
      
        
          1
          
              def perform(file_location)
        
      
        
          
          
                Rails.logger.tagged('backup') {Rails.logger.info "Restoring Mongo Backup #{file_location} at #{Time.current}"}
        
      
        
          
          
          
        
      
        
          
          
                # Restore backups by passing in a file_location from S3, remember to make sure file is public before doing this
        
      
        
          
          
                HerokuMongoBackup.load_from_file(file_location)
        
      
        
          
          
                Rails.logger.tagged('backup') {Rails.logger.info "Mongo Restore completed at #{Time.current}"}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class RestoreAndBackfillCategoryStats
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :sync
        
      
        
          1
          
              KEY = 'category_stats_restore_stuff'
        
      
        
          1
          
              STOP_KEY = 'dude_stop_it'
        
      
        
          1
          
              ETL_COUNT = 'restore_stuff_etl_count'
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          
          
                marker = $redis.get(KEY)
        
      
        
          
          
                return unless marker.present?
        
      
        
          
          
          
        
      
        
          
          
                warehouse_region_com_id = WarehouseRegion.com.id
        
      
        
          
          
                warehouse_region_co_uk_id = WarehouseRegion.couk.id
        
      
        
          
          
          
        
      
        
          
          
                date_string, stage = marker.split('|')
        
      
        
          
          
                date = date_string.to_date
        
      
        
          
          
                while date.present? && !$redis.get(STOP_KEY)
        
      
        
          
          
                  case stage
        
      
        
          
          
                    when 'date-start'
        
      
        
          
          
                      BackupWorkers::RestoreMongoBackup.perform_async s3_url_from_date(date_string)
        
      
        
          
          
                      $redis.set(KEY, "#{date_string}|restore-started")
        
      
        
          
          
                      sleep 30 while Utilities.class_in_sidekiq?(BackupWorkers::RestoreMongoBackup)
        
      
        
          
          
                      $redis.set(KEY, "#{date_string}|restore-finished")
        
      
        
          
          
                    when 'restore-started'
        
      
        
          
          
                      if Utilities.class_in_sidekiq?(BackupWorkers::RestoreMongoBackup)
        
      
        
          
          
                        sleep 30 while Utilities.class_in_sidekiq?(BackupWorkers::RestoreMongoBackup)
        
      
        
          
          
                        $redis.set(KEY, "#{date_string}|restore-finished")
        
      
        
          
          
                      else
        
      
        
          
          
                        cleanup_restored_data
        
      
        
          
          
                        $redis.set(KEY, "#{date_string}|date-started")
        
      
        
          
          
                      end
        
      
        
          
          
                    when 'restore-finished'
        
      
        
          
          
                      warehouse_date_id = WarehouseDate.where(date: date).first_or_create.id
        
      
        
          
          
                      warehouse_date_id, etl_product_stats, etl_book_version_categories, etl_category_stats = warehouse_date_id, false, false, true
        
      
        
          
          
          
        
      
        
          
          
                      collection = $mongodb.collection "stats_#{date_string.gsub('-', '')}-restored"
        
      
        
          
          
          
        
      
        
          
          
                      ids = WarehouseBookVersion.connection.execute("SELECT t.id FROM (SELECT id::varchar(255), row_number() OVER(ORDER BY id::varchar(255) ASC) AS row_asc FROM warehouse_book_versions where status = 'ingested') t WHERE t.row_asc % #{EtlWorkers::NightlyStatsEtl::BATCH_SIZE} = 0 OR t.row_asc % #{EtlWorkers::NightlyStatsEtl::BATCH_SIZE} = #{EtlWorkers::NightlyStatsEtl::BATCH_SIZE - 1} OR t.row_asc = 1").values.flatten
        
      
        
          
          
                      ids += WarehouseBookVersion.connection.execute("SELECT id::varchar(255) FROM warehouse_book_versions where status = 'ingested' order by id::varchar(255) DESC limit 1").values.flatten
        
      
        
          
          
                      params = ids.uniq.each_slice(2).collect do |slice|
        
      
        
          
          
                        [collection.name, slice.first, slice.last, warehouse_region_com_id, warehouse_region_co_uk_id, warehouse_date_id, date_string, etl_product_stats, etl_book_version_categories, etl_category_stats]
        
      
        
          
          
                      end.compact
        
      
        
          
          
          
        
      
        
          
          
                      $redis.set(ETL_COUNT, params.count)
        
      
        
          
          
                      Sidekiq::Client.push_bulk 'class' => EtlWorkers::NightlyStatsEtl, 'args' => params if params.present?
        
      
        
          
          
                      $redis.set(KEY, "#{date_string}|etl-queued")
        
      
        
          
          
                      sleep 30 while Utilities.class_in_sidekiq?(EtlWorkers::NightlyStatsEtl)
        
      
        
          
          
                      if CategoryStatsCollection.new(date).find.count == $redis.get(ETL_COUNT).to_i
        
      
        
          
          
                        $redis.set(KEY, "#{date_string}|etl-complete")
        
      
        
          
          
                      else
        
      
        
          
          
                        CategoryStatsCollection.new(date).drop
        
      
        
          
          
                        $redis.set(KEY, "#{date_string}|restore-finished")
        
      
        
          
          
                      end
        
      
        
          
          
                    when 'etl-queued'
        
      
        
          
          
                      sleep 30 while Utilities.class_in_sidekiq?(EtlWorkers::NightlyStatsEtl)
        
      
        
          
          
                      if CategoryStatsCollection.new(date).find.count == $redis.get(ETL_COUNT).to_i
        
      
        
          
          
                        $redis.set(KEY, "#{date_string}|etl-complete")
        
      
        
          
          
                      else
        
      
        
          
          
                        CategoryStatsCollection.new(date).drop
        
      
        
          
          
                        $redis.set(KEY, "#{date_string}|restore-finished")
        
      
        
          
          
                      end
        
      
        
          
          
                    when 'etl-complete'
        
      
        
          
          
                      EtlWorkers::CategoryStatsEtl.perform_async date_string
        
      
        
          
          
                      $redis.set(KEY, "#{date_string}|category-stats-etl-started")
        
      
        
          
          
                      sleep 30 while Utilities.class_in_sidekiq?(EtlWorkers::CategoryStatsEtl)
        
      
        
          
          
                      $redis.set(KEY, "#{date_string}|category-stats-etl-completed")
        
      
        
          
          
                    when 'category-stats-etl-started'
        
      
        
          
          
                      sleep 30 while Utilities.class_in_sidekiq?(EtlWorkers::CategoryStatsEtl)
        
      
        
          
          
                      $redis.set(KEY, "#{date_string}|category-stats-etl-completed")
        
      
        
          
          
                    when 'category-stats-etl-completed'
        
      
        
          
          
                      cleanup_restored_data
        
      
        
          
          
                      $redis.set(KEY, "#{date_string}|restore-cleaned")
        
      
        
          
          
                    when 'restore-cleaned'
        
      
        
          
          
                      '2014-07-24'.to_date > date ? $redis.del(KEY) : $redis.set(KEY, "#{(date - 1.day).to_s}|date-start")
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  marker = $redis.get(KEY)
        
      
        
          
          
                  date_string, stage = marker.split('|')
        
      
        
          
          
                  date = date_string.to_date
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def s3_url_from_date(date_string)
        
      
        
          
          
                case date_string
        
      
        
          
          
                  when '2014-07-24'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-07-24--2014-07-24%7C08%3A35%3A54.gz'
        
      
        
          
          
                  when '2014-07-25'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-07-25--2014-07-25%7C08%3A40%3A23.gz'
        
      
        
          
          
                  when '2014-07-26'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-07-26--2014-07-26%7C08%3A13%3A30.gz'
        
      
        
          
          
                  when '2014-07-27'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-07-27--2014-07-27%7C08%3A18%3A35.gz'
        
      
        
          
          
                  when '2014-07-28'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-07-28--2014-07-28%7C08%3A35%3A00.gz'
        
      
        
          
          
                  when '2014-07-29'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-07-29--2014-07-29%7C08%3A45%3A53.gz'
        
      
        
          
          
                  when '2014-07-30'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-07-30--2014-07-30%7C08%3A41%3A32.gz'
        
      
        
          
          
                  when '2014-07-31'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-07-31--2014-07-31%7C08%3A56%3A40.gz'
        
      
        
          
          
                  when '2014-08-01'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-08-01--2014-08-01%7C09%3A49%3A21.gz'
        
      
        
          
          
                  when '2014-08-02'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-08-02--2014-08-02%7C12%3A22%3A31.gz'
        
      
        
          
          
                  when '2014-08-03'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-08-03--2014-08-03%7C09%3A10%3A47.gz'
        
      
        
          
          
                  when '2014-08-04'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-08-04--2014-08-04%7C14%3A13%3A11.gz'
        
      
        
          
          
                  when '2014-08-05'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-08-05--2014-08-05%7C16%3A29%3A29.gz'
        
      
        
          
          
                  when '2014-08-06'
        
      
        
          
          
                    'https://s3.amazonaws.com/booklr-production/backups/2014-08-06--2014-08-06%7C14%3A49%3A56.gz'
        
      
        
          
          
                  else
        
      
        
          
          
                    nil
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def cleanup_restored_data
        
      
        
          
          
                $mongodb.collection_names.select {|x| x.ends_with? '-restored'}.each do |name|
        
      
        
          
          
                  $mongodb.collection(name).drop
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module BigDataReports
        
      
        
          1
          
            class QueueReportBatch
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform(report_id)
        
      
        
          
          
                batch_document = ReportBatch.find(report_id)
        
      
        
          
          
                batch_id = BSON::ObjectId.from_string(report_id)
        
      
        
          
          
                batch_parameters = batch_document.batch_params
        
      
        
          
          
          
        
      
        
          
          
                isbn_warehouse_book_versions = WarehouseBookVersion.ingested.com.where(isbn13: batch_parameters['isbn13s']).joins(:warehouse_stats).where{warehouse_stats.warehouse_date_id == batch_parameters['warehouse_date_id']}
        
      
        
          
          
                asin_warehouse_book_versions = WarehouseBookVersion.ingested.com.where(asin: batch_parameters['asins']).joins(:warehouse_stats).where{warehouse_stats.warehouse_date_id == batch_parameters['warehouse_date_id']}
        
      
        
          
          
          
        
      
        
          
          
                invalid_isbn13s = batch_parameters['isbn13s'] - isbn_warehouse_book_versions.collect(&:isbn13)
        
      
        
          
          
                invalid_asins = batch_parameters['asins'] - asin_warehouse_book_versions.collect(&:asin)
        
      
        
          
          
                invalid_book_versions = WarehouseBookVersion.com.where{(asin.in invalid_asins) | (isbn13.in invalid_isbn13s)}
        
      
        
          
          
                klass = case batch_document.job_type
        
      
        
          
          
                          when 'BigDataReports::Top100ProjectedRankReportWorker'
        
      
        
          
          
                            Top100ProjectedRankReport
        
      
        
          
          
                          when 'BigDataReports::SimilarBookPricingReportWorker'
        
      
        
          
          
                            SimilarBookPricingReport
        
      
        
          
          
                          else
        
      
        
          
          
                            []
        
      
        
          
          
                        end
        
      
        
          
          
                (invalid_asins - invalid_book_versions.collect(&:asin)).each do |missing_asin|
        
      
        
          
          
                  klass.create job_type: batch_document.job_type,
        
      
        
          
          
                               asin: missing_asin,
        
      
        
          
          
                               warehouse_date_id: batch_parameters['warehouse_date_id'],
        
      
        
          
          
                               batch_ids: [batch_id],
        
      
        
          
          
                               status: :unable_to_process,
        
      
        
          
          
                               error: "ASIN #{missing_asin} is not currently in our system"
        
      
        
          
          
                end
        
      
        
          
          
                (invalid_isbn13s - invalid_book_versions.collect(&:isbn13)).each do |missing_isbn13|
        
      
        
          
          
                  klass.create job_type: batch_document.job_type,
        
      
        
          
          
                               isbn13: missing_isbn13,
        
      
        
          
          
                               warehouse_date_id: batch_parameters['warehouse_date_id'],
        
      
        
          
          
                               batch_ids: [batch_id],
        
      
        
          
          
                               status: :unable_to_process,
        
      
        
          
          
                               error: "ISBN13 #{missing_isbn13} is not currently in our system"
        
      
        
          
          
                end
        
      
        
          
          
                invalid_book_versions.each do |invalid_book_version|
        
      
        
          
          
                  error_msg = case
        
      
        
          
          
                                when !invalid_book_version.ingested?
        
      
        
          
          
                                  "Book version is in invalid state (#{invalid_book_version.status})"
        
      
        
          
          
                                when invalid_book_version.warehouse_stats.blank?
        
      
        
          
          
                                  'Book version is valid but has no stats, it was most likely ingested today. If this problem persists tomorrow, contact your friendly neighborhood support.'
        
      
        
          
          
                                when invalid_book_version.warehouse_stats.where{warehouse_stats.warehouse_date_id == batch_parameters['warehouse_date_id']}.blank?
        
      
        
          
          
                                  'Book version is valid but has no data for today, let your big data helpers know so they can investigate'
        
      
        
          
          
                              end
        
      
        
          
          
                  klass.create job_type: batch_document.job_type,
        
      
        
          
          
                               asin: invalid_book_version.asin,
        
      
        
          
          
                               isbn13: invalid_book_version.isbn13,
        
      
        
          
          
                               warehouse_date_id: batch_parameters['warehouse_date_id'],
        
      
        
          
          
                               batch_ids: [batch_id],
        
      
        
          
          
                               status: :unable_to_process,
        
      
        
          
          
                               error: error_msg
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                report_ids = []
        
      
        
          
          
                all_book_versions = isbn_warehouse_book_versions + asin_warehouse_book_versions
        
      
        
          
          
                all_book_versions.each do |book_version|
        
      
        
          
          
                  report = klass.create job_type: batch_document.job_type,
        
      
        
          
          
                                        asin: book_version.asin,
        
      
        
          
          
                                        isbn13: book_version.isbn13,
        
      
        
          
          
                                        warehouse_date_id: batch_parameters['warehouse_date_id'],
        
      
        
          
          
                                        batch_ids: [batch_id],
        
      
        
          
          
                                        status: :processing
        
      
        
          
          
                  report_ids << report.id
        
      
        
          
          
                end
        
      
        
          
          
                job_params = report_ids.collect(&method(:Array))
        
      
        
          
          
                Report.start_batch(report_ids, batch_id)
        
      
        
          
          
                Sidekiq::Client.push_bulk('class' => batch_document.job_type.constantize, 'args' => job_params)
        
      
        
          
          
          
        
      
        
          
          
                batch_document.destroy
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class Top100ProjectedRankReportWorker
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform(report_id)
        
      
        
          
          
                report = Top100ProjectedRankReport.find(report_id)
        
      
        
          
          
                report.generate
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class Top100PriceDistributionReportWorker
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              include ReportUtilities
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform(date_string)
        
      
        
          
          
                warehouse_date_id = WarehouseDate.find_by(date: date_string.to_date).id
        
      
        
          
          
          
        
      
        
          
          
                report = Top100PriceDistributionReport.create warehouse_date_id: warehouse_date_id,
        
      
        
          
          
                                                              status: :processing
        
      
        
          
          
                report.generate
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class SimilarBookPricingReportWorker
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              include ReportUtilities
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform(report_id)
        
      
        
          
          
                report = SimilarBookPricingReport.find(report_id)
        
      
        
          
          
                report.generate
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class NewIdentification
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              include ReportUtilities
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform(report_date_string, amazon_average_rating, amazon_review_count, days_of_data_min, min_page_count, client_name, category_names)
        
      
        
          
          
                Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{client_name.to_s.camelcase} Identification Report -----"}
        
      
        
          
          
          
        
      
        
          
          
                client_name = :booklr
        
      
        
          
          
                client_config = AmazeBot.config[:reports][:clients][client_name]
        
      
        
          
          
                report_date = report_date_string.to_date
        
      
        
          
          
          
        
      
        
          
          
                minimum_creation_date = (report_date - days_of_data_min.days).in_time_zone.to_s
        
      
        
          
          
                date_range = (report_date - 89.days)..report_date
        
      
        
          
          
                warehouse_dates = WarehouseDate.where(date: date_range).order(date: :asc)
        
      
        
          
          
                current_warehouse_date = warehouse_dates.last
        
      
        
          
          
                warehouse_date_ids = warehouse_dates.collect(&:id)
        
      
        
          
          
          
        
      
        
          
          
                warehouse_region_id = WarehouseRegion.com.id
        
      
        
          
          
                warehouse_categories = WarehouseCategory.amazon.com
        
      
        
          
          
          
        
      
        
          
          
                book_versions = identify_book_versions current_warehouse_date.id,
        
      
        
          
          
                                                       category_names: category_names,
        
      
        
          
          
                                                       minimum_creation_date: minimum_creation_date,
        
      
        
          
          
                                                       minimum_amazon_average_rating: amazon_average_rating,
        
      
        
          
          
                                                       minimum_amazon_review_count: amazon_review_count,
        
      
        
          
          
                                                       min_page_count: min_page_count
        
      
        
          
          
                stats = current_warehouse_date.warehouse_stats.where(warehouse_book_version_id: book_versions.collect(&:id).uniq)
        
      
        
          
          
                rank_projections_by_book_version_id = WarehouseStat.top100_rank_projections current_warehouse_date, stats, false
        
      
        
          
          
                filtered_book_versions = book_versions.select {|book_version| rank_projections_by_book_version_id[book_version.id].any? {|_, projection_details| !projection_details[:currently_ranked] && projection_details[:position] <= 100}}
        
      
        
          
          
                filtered_book_version_ids = filtered_book_versions.collect(&:id)
        
      
        
          
          
          
        
      
        
          
          
                Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{client_name.to_s.camelcase} Identification Complete, report generation starting -----"}
        
      
        
          
          
          
        
      
        
          
          
                report_hash = EnterpriseReports.generate_report_hash("#{client_name}-new-identification-report-#{report_date.strftime('%m%d%y')}", client_name)
        
      
        
          
          
                report_csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          
          
          
        
      
        
          
          
                # Append header row
        
      
        
          
          
                header = ['Title', 'Author', 'ASIN', 'ISBN', 'Pub Date', 'Publisher', 'Sold By', 'Page Count', 'Days of Data',
        
      
        
          
          
                          'Created Date', 'Total Days Since Creation', '90 Day Average Overall Rank', '30 Day Moving Average',
        
      
        
          
          
                          '7 Day Moving Average', 'Trendline Growth %', 'R-Squared', '90 Day Overall Rank Growth Rate',
        
      
        
          
          
                          '90 Day Overall Rank Volatility', 'Apple Number of Ratings', 'BN number of Ratings', 'Amazon Number of Likes',
        
      
        
          
          
                          'Amazon Number of Ratings', 'Ratings Per Day Since Published', 'Reviews Per Day Over Last 90 Days',
        
      
        
          
          
                          'Average Star Rating', '% of Ratings 4 or above', '#1 Similar Category', '#2 Similar Category', '#3 Similar Category',
        
      
        
          
          
                          '#1 Sub Category', '#1 Sub Category Percentage', '#2 Sub Category', '#2 Sub Category Percentage', '#3 Sub Category',
        
      
        
          
          
                          '#3 Sub Category Percentage', 'Current Sales Rank', 'Current Price', 'Product URL', 'Amazon Description',
        
      
        
          
          
                          'Projected Top 100 Category Name and Rank']
        
      
        
          
          
                report_csv << header
        
      
        
          
          
          
        
      
        
          
          
                block_size = 20
        
      
        
          
          
                (filtered_book_version_ids.count / block_size + 1).times do |block_count|
        
      
        
          
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- Generating #{client_name.to_s.camelcase} report for #{(block_size * block_count)..(block_size * (block_count + 1) - 1)} out of #{filtered_book_version_ids.count} book versions at #{Time.current}-----"}
        
      
        
          
          
          
        
      
        
          
          
                  WarehouseStat.select(WarehouseStat::WAREHOUSE_STAT_FIELDS + WarehouseStat::WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_TREE_FIELDS).
        
      
        
          
          
                                where(warehouse_book_version_id: filtered_book_version_ids[(block_size * block_count)..(block_size * (block_count + 1) - 1)]).
        
      
        
          
          
                                where(warehouse_date_id: warehouse_date_ids, warehouse_region_id: warehouse_region_id).
        
      
        
          
          
                                order(:warehouse_book_version_id, :warehouse_date_id).
        
      
        
          
          
                                chunk{|el| el.warehouse_book_version_id}.each do |warehouse_book_version_id, warehouse_stats|
        
      
        
          
          
          
        
      
        
          
          
                    book_version = filtered_book_versions.select {|warehouse_book_version| warehouse_book_version.id == warehouse_book_version_id}.first
        
      
        
          
          
                    if warehouse_stats.collect(&:amazon_sales_rank).compact.blank?
        
      
        
          
          
                      Rails.logger.tagged('enterprise') {Rails.logger.info "skipping warehouse_book_version #{book_version.id}, no sales rank data"}
        
      
        
          
          
                      next
        
      
        
          
          
                    elsif warehouse_stats.count < days_of_data_min
        
      
        
          
          
                      Rails.logger.tagged('enterprise') {Rails.logger.info "skipping warehouse_book_version #{book_version.id}, less than #{days_of_data_min} days of data"}
        
      
        
          
          
                      next
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          
          
                    oldest_stat, newest_stat = warehouse_stats.first, warehouse_stats.last
        
      
        
          
          
          
        
      
        
          
          
                    first_stat_date = warehouse_dates.select{|warehouse_date| warehouse_date.id == oldest_stat.warehouse_date_id}.first.date
        
      
        
          
          
                    last_stat_date = warehouse_dates.select{|warehouse_date| warehouse_date.id == newest_stat.warehouse_date_id}.first.date
        
      
        
          
          
                    row = [book_version.title, book_version.author_name, book_version.asin, book_version.isbn13, book_version.pub_date,
        
      
        
          
          
                           book_version.publisher, book_version.sold_by, book_version.pages, warehouse_stats.count, book_version.created_at.to_date.to_s,
        
      
        
          
          
                           (Date.current - book_version.created_at.to_date).to_i]
        
      
        
          
          
          
        
      
        
          
          
                    # Regression Setup
        
      
        
          
          
                    amazon_sales_ranks = warehouse_stats.collect(&:amazon_sales_rank).compact
        
      
        
          
          
                    line_fit = LineFit.new
        
      
        
          
          
                    valid = line_fit.setData((1..amazon_sales_ranks.count).to_a, amazon_sales_ranks)
        
      
        
          
          
          
        
      
        
          
          
                    row += [amazon_sales_ranks.mean, amazon_sales_ranks.moving_average(30).last, amazon_sales_ranks.moving_average(7).last]
        
      
        
          
          
                    row << (valid ? "#{((line_fit.forecast(1) - line_fit.forecast(amazon_sales_ranks.count)) / (line_fit.forecast(1).abs) * 100).round(2)}%" : nil)
        
      
        
          
          
                    row << (valid ? line_fit.rSquared.round(3) : nil)
        
      
        
          
          
                    row << Formulas.average_growth_rate(oldest_stat.amazon_sales_rank, newest_stat.amazon_sales_rank, last_stat_date - first_stat_date, :negative)
        
      
        
          
          
          
        
      
        
          
          
                    row += [amazon_sales_ranks.standard_deviation, newest_stat.itunes_rating_count, newest_stat.bn_review_count,
        
      
        
          
          
                            newest_stat.amazon_likes, newest_stat.amazon_review_count]
        
      
        
          
          
                    row << (newest_stat.amazon_review_count.present? && book_version.pub_date.present? ? newest_stat.amazon_review_count / (report_date - book_version.pub_date.to_date).to_f : nil)
        
      
        
          
          
                    row << (oldest_stat.amazon_review_count.present? && newest_stat.amazon_review_count.present? ? (newest_stat.amazon_review_count - oldest_stat.amazon_review_count) / warehouse_stats.count.to_f : nil)
        
      
        
          
          
          
        
      
        
          
          
                    row << newest_stat.amazon_average_rating
        
      
        
          
          
                    if newest_stat.amazon_review_count.present? && (newest_stat.five_star_count.present? || newest_stat.four_star_count.present?)
        
      
        
          
          
                      top_count = (newest_stat.five_star_count || 0) + (newest_stat.four_star_count || 0)
        
      
        
          
          
                      row << ((top_count.to_f / newest_stat.amazon_review_count) * 100).to_s + '%'
        
      
        
          
          
                    else
        
      
        
          
          
                      row << '0%'
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          
          
                    # Top Similar Item Categories
        
      
        
          
          
                    top_similar_item_categories = warehouse_stats.collect {|warehouse_stat| warehouse_stat.amazon_similar_item_category_names}.flatten.mode.reverse.first(3)
        
      
        
          
          
                    row += EnterpriseReports.pad_serialized_data(top_similar_item_categories, 3) {|value| value}
        
      
        
          
          
          
        
      
        
          
          
                    # Top Sub Categories
        
      
        
          
          
                    top_subcategory_id_frequencies = warehouse_stats.collect {|warehouse_stat| (1..3).collect {|num| warehouse_stat.send "warehouse_amazon_category#{num}_id"}}.flatten.compact.frequencies.to_a.reverse.first(3)
        
      
        
          
          
                    category_names_and_percentages = top_subcategory_id_frequencies.map {|warehouse_category_id, count| [warehouse_categories.find {|category| category.id == warehouse_category_id}.name, "#{count.to_f / warehouse_stats.count * 100}%"]}
        
      
        
          
          
                    row += EnterpriseReports.pad_serialized_data(category_names_and_percentages, 6) {|values| values.flatten}
        
      
        
          
          
          
        
      
        
          
          
                    row += [newest_stat.amazon_sales_rank, (newest_stat.amazon_price / 100.0 if newest_stat.amazon_price.present?), Urls.amazon_book_page(book_version.asin, '.com'), book_version.amazon_book_description]
        
      
        
          
          
          
        
      
        
          
          
                    # Top 100 Projections
        
      
        
          
          
                    rankable_projection_details = rank_projections_by_book_version_id[warehouse_book_version_id].select do |_, projection_details|
        
      
        
          
          
                      !projection_details[:currently_ranked] && projection_details[:position] <= 100
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          
          
                    rankable_projection_details.each do |_, projection_details|
        
      
        
          
          
                      row << "#{projection_details[:name]} -- #{projection_details[:position]}"
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          
          
                    report_csv << row
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                report_csv.flush
        
      
        
          
          
          
        
      
        
          
          
                # Pass reports array to mailer and deliver
        
      
        
          
          
                EnterpriseReports.move_to_s3(client_name, report_csv)
        
      
        
          
          
                # EnterpriseReportsMailer.basic_report(report_hash, client_config[:reports]["#{client_name}_identification"]).deliver
        
      
        
          
          
          
        
      
        
          
          
                report_csv.close
        
      
        
          
          
          
        
      
        
          
          
                Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{client_name.to_s.camelcase} Identification Report Delivered -----"}
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def identify_book_versions(warehouse_date_id, options = {})
        
      
        
          
          
                category_names = options.delete :category_names
        
      
        
          
          
                minimum_creation_date = options.delete :minimum_creation_date
        
      
        
          
          
                minimum_amazon_average_rating = options.delete :minimum_amazon_average_rating
        
      
        
          
          
                minimum_amazon_review_count = options.delete :minimum_amazon_review_count
        
      
        
          
          
                min_page_count = options.delete :min_page_count
        
      
        
          
          
                relation = WarehouseBookVersion.joins(:warehouse_stats).where{warehouse_stats.warehouse_date_id == warehouse_date_id}
        
      
        
          
          
                if category_names.present?
        
      
        
          
          
                  book_version_categories = BookVersionCategory.where(category_name: category_names)
        
      
        
          
          
                  relation = relation.where(id: book_version_categories.collect(&:warehouse_book_version_id)) if book_version_categories.present?
        
      
        
          
          
                end
        
      
        
          
          
                relation = relation.where{created_at < minimum_creation_date} if minimum_creation_date.present?
        
      
        
          
          
                relation = relation.where{warehouse_stats.amazon_average_rating > minimum_amazon_average_rating} if minimum_amazon_average_rating.present?
        
      
        
          
          
                relation = relation.where{warehouse_stats.amazon_review_count > minimum_amazon_review_count} if minimum_amazon_review_count.present?
        
      
        
          
          
          
        
      
        
          
          
                warehouse_book_versions = relation.reject {|warehouse_book_version| EnterpriseReports::StatisticalReports::EXCLUDED_PUBLISHERS.include? warehouse_book_version.publisher}
        
      
        
          
          
                warehouse_book_versions = warehouse_book_versions.reject {|warehouse_book_version| EnterpriseReports::StatisticalReports::EXCLUDED_SOLD_BY.include? warehouse_book_version.sold_by}
        
      
        
          
          
                warehouse_book_versions = warehouse_book_versions.reject {|warehouse_book_version| warehouse_book_version.pages.to_i < min_page_count} if min_page_count
        
      
        
          
          
          
        
      
        
          
          
                warehouse_book_versions
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module BookVersionValidationWorkers
        
      
        
          1
          
            class QueueDiscoverAmazon404sBlocks
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(block_size = RedisUtilities::BLOCK_SIZE)
        
      
        
          
          
                book_version_ids = WarehouseBookVersion.ingested.order(:id).value_of(:id)
        
      
        
          
          
                params = book_version_ids.each_slice(block_size).collect {|id_slice| [id_slice.first, id_slice.last]}
        
      
        
          
          
                Sidekiq::Client.push_bulk('class' => BookVersionValidationWorkers::QueueDiscoverAmazon404s, 'args' => params)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueDiscoverAmazon404s
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(starting_book_version_id, ending_book_version_id)
        
      
        
          
          
                params = WarehouseBookVersion.ingested.where(id: starting_book_version_id..ending_book_version_id).value_of(:id, :asin, :tld).collect(&method(:Array))
        
      
        
          
          
                Sidekiq::Client.push_bulk('class' => MongoBookVersionExceptionWorkers::DiscoverAmazon404s, 'args' => params)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueValidationBlocks
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(block_size = RedisUtilities::BLOCK_SIZE)
        
      
        
          1
          
                user_ids = User.where(validate_tracked_book_versions: true).value_of :id
        
      
        
          1
          
                book_version_ids = TrackedBookVersion.where(user_id: user_ids).order(:warehouse_book_version_id).uniq.value_of(:warehouse_book_version_id)
        
      
        
          2
          
                Rails.logger.tagged('enterprise') {Rails.logger.info "--- Validations running on #{user_ids.count} users for #{book_version_ids.count} tracked isbns---"}
        
      
        
          3
          
                params = book_version_ids.each_slice(block_size).collect {|id_slice| [id_slice.first, id_slice.last, user_ids]}
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => BookVersionValidationWorkers::QueueValidations, 'args' => params)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueValidations
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(starting_book_version_id, ending_book_version_id, user_ids)
        
      
        
          2
          
                Rails.logger.tagged('enterprise') {Rails.logger.info "Validations running on the id block: #{starting_book_version_id} - #{ending_book_version_id}"}
        
      
        
          1
          
                collection_name = MongoUtilities.daily_collection_name(:book_version_exceptions)
        
      
        
          1
          
                sql = TrackedBookVersion.select([:warehouse_book_version_id, :metadata]).
        
      
        
          
          
                    join_select(:inner, false, warehouse_book_version: [:asin, :tld, :status]).
        
      
        
          
          
                    select('COALESCE(warehouse_book_versions.isbn13, warehouse_book_versions.asin) as isbn_or_asin').
        
      
        
          
          
                    joins(:warehouse_book_version).
        
      
        
          
          
                    where(user_id: user_ids, warehouse_book_version_id: starting_book_version_id..ending_book_version_id).
        
      
        
          
          
                    order(:warehouse_book_version_id).to_sql
        
      
        
          4
          
                amazon_params = TrackedBookVersion.connection.execute(sql).chunk {|res| res['warehouse_book_version_id']}.collect do |_, rows|
        
      
        
          3
          
                  base_row = rows.first
        
      
        
          3
          
                  metadatas = rows.collect do |row|
        
      
        
          3
          
                    row['metadata'].present? ? YAML.load(row['metadata']) : nil
        
      
        
          
          
                  end
        
      
        
          3
          
                  url_hints = Utilities.get_url_hints_from_metadata metadatas
        
      
        
          
          
          
        
      
        
          3
          
                  [collection_name, base_row['warehouse_book_version_id'], base_row['isbn_or_asin'], base_row['warehouse_book_version_asin'],
        
      
        
          
          
                   base_row['warehouse_book_version_tld'], base_row['warehouse_book_version_status'], url_hints]
        
      
        
          
          
                end
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => MongoBookVersionExceptionWorkers::ValidateAmazonSearch, 'args' => amazon_params)
        
      
        
          4
          
                bn_params = amazon_params.collect {|params| params.first(3)}
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => MongoBookVersionExceptionWorkers::ValidateBarnesAndNoble, 'args' => bn_params)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueItunesValidationBlocks
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(block_size = RedisUtilities::BLOCK_SIZE)
        
      
        
          1
          
                user_ids = User.where(validate_tracked_book_versions_on_itunes: true).value_of :id
        
      
        
          2
          
                book_version_ids = TrackedBookVersion.where(user_id: user_ids).joins(:warehouse_book_version).where{warehouse_book_version.status == 'ingested'}.order(:warehouse_book_version_id).uniq.value_of(:warehouse_book_version_id)
        
      
        
          3
          
                params = book_version_ids.each_slice(block_size).collect {|id_slice| [id_slice.first, id_slice.last, user_ids]}
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => BookVersionValidationWorkers::QueueItunesValidations, 'args' => params)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueItunesValidations
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          
          
              # iTunes Validations: Only validate if the title is ingested, an ebook, has an isbn and is part of rhincactive list
        
      
        
          1
          
              def perform(starting_book_version_id, ending_book_version_id, user_ids)
        
      
        
          1
          
                collection_name = MongoUtilities.daily_collection_name(:book_version_exceptions)
        
      
        
          1
          
                sql = TrackedBookVersion.select(:warehouse_book_version_id).
        
      
        
          
          
                    join_select(:inner, false, warehouse_book_version: [:book_format, :status]).
        
      
        
          
          
                    select('COALESCE(warehouse_book_versions.isbn13, warehouse_book_versions.asin) as isbn_or_asin').
        
      
        
          
          
                    joins(:warehouse_book_version).
        
      
        
          
          
                    where(user_id: user_ids, warehouse_book_version_id: starting_book_version_id..ending_book_version_id).
        
      
        
          
          
                    order(:warehouse_book_version_id).to_sql
        
      
        
          6
          
                params = TrackedBookVersion.connection.execute(sql).chunk {|res| res['warehouse_book_version_id']}.collect do |_, rows|
        
      
        
          5
          
                  base_row = rows.first
        
      
        
          5
          
                  if base_row['warehouse_book_version_status'] == 'ingested' && base_row['warehouse_book_version_book_format'].include?('Kindle Edition') && Utilities.is_isbn?(base_row['isbn_or_asin'])
        
      
        
          2
          
                    [collection_name, base_row['warehouse_book_version_id'], base_row['isbn_or_asin']]
        
      
        
          
          
                  end
        
      
        
          
          
                end.compact
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => MongoBookVersionExceptionWorkers::ValidateItunes, 'args' => params)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class ValidateNewBookVersion
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(book_version_id)
        
      
        
          6
          
                book_version = WarehouseBookVersion.find book_version_id
        
      
        
          6
          
                return unless book_version.status == :new
        
      
        
          
          
          
        
      
        
          5
          
                if book_version.isbn_or_asin.blank?
        
      
        
          1
          
                  book_version.update_attributes status: :invalid_on_amazon
        
      
        
          
          
                else
        
      
        
          4
          
                  ProxyUtilities.proxy_setup :amazon
        
      
        
          4
          
                  search_page = AmazonSearchPage.by_isbn_or_asin_and_tld book_version.isbn_or_asin, book_version.tld
        
      
        
          4
          
                  if handle_captcha(search_page, 60, book_version_id)
        
      
        
          3
          
                    valid = !Validations.amazon_not_found_in_search?(search_page) && !Validations.amazon_ambiguous_search_results?(search_page, Utilities.get_url_hints_from_metadata(book_version.tracked_book_versions.collect(&:metadata)))
        
      
        
          
          
          
        
      
        
          3
          
                    book_version.update_attributes status: (valid ? :validated : :invalid_on_amazon)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module BookVersionWorkers
        
      
        
          1
          
            class Create
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :background
        
      
        
          
          
          
        
      
        
          1
          
              def perform(params)
        
      
        
          5
          
                params = params.with_indifferent_access
        
      
        
          30
          
                existence_params = params.dup.keep_if {|key, _| %w[isbn13 asin bn_id itunes_id tld].include? key.to_s}
        
      
        
          5
          
                tld = existence_params.delete :tld
        
      
        
          5
          
                arel_table = WarehouseBookVersion.arel_table
        
      
        
          
          
                # Check for WarehouseBookVersion where (asin = x AND tld = t) OR (isbn = y AND tld = t) OR (bn_id = z AND tld = t)
        
      
        
          25
          
                conditions = existence_params.each_pair.collect {|key, value| arel_table[key].eq(value).and(arel_table[:tld].eq(tld))}
        
      
        
          20
          
                WarehouseBookVersion.create! params unless WarehouseBookVersion.where(conditions.reduce {|final_condition, condition| final_condition.or(condition)}).exists?
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class Update
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :background
        
      
        
          
          
          
        
      
        
          1
          
              def perform(params)
        
      
        
          1
          
                params = params.with_indifferent_access
        
      
        
          1
          
                WarehouseBookVersion.find(params[:id]).update_attributes(params.except(:id))
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class Ingest
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :background
        
      
        
          
          
          
        
      
        
          1
          
              def perform(warehouse_book_version_id)
        
      
        
          2
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          2
          
                  begin
        
      
        
          2
          
                    warehouse_book_version = WarehouseBookVersion.find warehouse_book_version_id
        
      
        
          1
          
                    warehouse_book_version.ingest
        
      
        
          
          
                  rescue ActiveRecord::RecordNotFound
        
      
        
          2
          
                    Rails.logger.tagged('book_data') {Rails.logger.info "Tried to ingest a book that no longer exists"}
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueValidatedTop100ApiCall
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :ingest_high
        
      
        
          
          
          
        
      
        
          1
          
              def perform(wait_till_full = 0)
        
      
        
          12
          
                WarehouseBookVersion.com.where{status == 'validated_from_top_100s'}.limit(AmazonApiWorkers::GetManyApiResponses::BATCH_SIZE * 5).each_slice(AmazonApiWorkers::GetManyApiResponses::BATCH_SIZE) do |warehouse_book_versions_slice|
        
      
        
          19
          
                  AmazonApiWorkers::GetManyApiResponses.perform_async warehouse_book_versions_slice.collect(&:id), :asin, '.com' if wait_till_full == 0 || warehouse_book_versions_slice.count == AmazonApiWorkers::GetManyApiResponses::BATCH_SIZE
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class MonitorScrapeJobCount
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform(scrape_count_key, worker_class_string)
        
      
        
          6
          
                return NotificationMailer.scraper_count_error('Scrape count was not properly updated in redis for comparison').deliver if RedisUtilities.get_count(scrape_count_key).blank? || RedisUtilities.get_count(scrape_count_key) == 0
        
      
        
          
          
          
        
      
        
          5
          
                scraping_jobs = [BookVersionWorkers::QueueNightlyScrape, BookVersionWorkers::QueueNightlyScrapeBlock,
        
      
        
          
          
                                 BookVersionWorkers::QueueAmazonAuthorPageScrape, BookVersionWorkers::QueueAmazonAuthorPageScrapeBlock]
        
      
        
          5
          
                while Utilities.class_in_sidekiq?(scraping_jobs)
        
      
        
          2
          
                  return NotificationMailer.scraper_count_error('Scrape queuing jobs are repeatedly failing').deliver if Utilities.job_class_in_retry_set?(scraping_jobs, 4)
        
      
        
          
          
          
        
      
        
          1
          
                  sleep(10)
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          4
          
                scraping_queue = Sidekiq::Queue.new worker_class_string.constantize.sidekiq_options_hash['queue']
        
      
        
          4
          
                staging_queue = Sidekiq::Queue.new Sidekiq::Client.convert_to_staging_queue(scraping_queue.name)
        
      
        
          4
          
                return NotificationMailer.scraper_count_error("Queuing failed, number of queued jobs (#{staging_queue.size}) does not match the scrape count in redis (#{RedisUtilities.get_count scrape_count_key})").deliver if RedisUtilities.get_count(scrape_count_key) != staging_queue.size
        
      
        
          
          
          
        
      
        
          
          
                Sidekiq.redis do |conn|
        
      
        
          
          
                  conn.rename("queue:#{staging_queue.name}", "queue:#{scraping_queue.name}")
        
      
        
          
          
                  conn.sadd 'queues', scraping_queue.name
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                NotificationMailer.scraper_count_error('Queue rename failed, no jobs on the scraping queue').deliver unless scraping_queue.size > 0
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueNightlyScrape
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(scope)
        
      
        
          2
          
                Rails.logger.tagged("book_data_#{scope}") {Rails.logger.info "Stat run started at: #{Time.current}"}
        
      
        
          
          
          
        
      
        
          
          
                # Create mongo stats collection for today and then apply usePowerof2Sizes to improve storage/performance
        
      
        
          1
          
                $mongodb.command({'create' => MongoUtilities.daily_collection_name(:stats)})
        
      
        
          1
          
                $mongodb.command({'collMod' => MongoUtilities.daily_collection_name(:stats), 'usePowerOf2Sizes' => true})
        
      
        
          
          
          
        
      
        
          1
          
                ids = WarehouseBookVersion.send(scope).order(:id).value_of(:id)
        
      
        
          
          
          
        
      
        
          1
          
                scrape_count_key = RedisUtilities.get_scrape_count_key(scope)
        
      
        
          1
          
                RedisUtilities.set_count scrape_count_key, ids.count
        
      
        
          1
          
                values = ids.each_slice(RedisUtilities::BLOCK_SIZE).collect do |id_slice|
        
      
        
          1
          
                  [scope, id_slice.first, RedisUtilities::BLOCK_SIZE]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => BookVersionWorkers::QueueNightlyScrapeBlock, 'args' => values)
        
      
        
          1
          
                BookVersionWorkers::MonitorScrapeJobCount.perform_async scrape_count_key, WarehouseBookVersion.scope_to_worker_class(scope).to_s
        
      
        
          
          
          
        
      
        
          2
          
                Rails.logger.tagged("book_data_#{scope}") {Rails.logger.info "Results: #{ids.count} book versions queued into #{(ids.count / RedisUtilities::BLOCK_SIZE.to_f).ceil} collections."}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueNightlyScrapeBlock
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(scope, starting_id, block_size)
        
      
        
          8
          
                Rails.logger.tagged("book_data_#{scope}") {Rails.logger.info "Queuing block of #{RedisUtilities::BLOCK_SIZE} starting from #{starting_id} at: #{Time.current}"}
        
      
        
          
          
          
        
      
        
          4
          
                collection_name = MongoUtilities.daily_collection_name(:stats)
        
      
        
          4
          
                select_statement = "'#{collection_name}' AS collection_name, asin || '-' || tld AS mongo_id, id, asin, COALESCE(isbn13, bn_id) AS ean, tld, itunes_id, book_format"
        
      
        
          4
          
                select_statement += ', canonical_bn_url' if scope.to_s == 'bn_statable'
        
      
        
          4
          
                select_statement += ', canonical_goodreads_url' if scope.to_s == 'goodreads_statable'
        
      
        
          8
          
                sql = WarehouseBookVersion.send(scope).select(select_statement).order(:id).where{id >= starting_id}.limit(block_size).to_sql
        
      
        
          4
          
                result = WarehouseBookVersion.connection.execute(sql)
        
      
        
          
          
          
        
      
        
          4
          
                Sidekiq::Client.push_bulk_staged('class' => WarehouseBookVersion.scope_to_worker_class(scope), 'args' => result.values) if result.values.present?
        
      
        
          
          
          
        
      
        
          8
          
                Rails.logger.tagged("book_data_#{scope}") {Rails.logger.info "Queuing block of #{RedisUtilities::BLOCK_SIZE} starting from #{starting_id} completed at: #{Time.current}"}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueAmazonAuthorPageScrape
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          
          
                sql = WarehouseBookVersion.select('distinct on (tld, author_asin) author_asin').where.not(author_asin: nil).group(:tld, :author_asin).to_sql
        
      
        
          
          
                result = WarehouseBookVersion.connection.execute(sql)
        
      
        
          
          
                if result.count > 0
        
      
        
          
          
                  scrape_count_key = RedisUtilities.get_scrape_count_key(:amazon_author_page)
        
      
        
          
          
                  RedisUtilities.set_count scrape_count_key, result.count
        
      
        
          
          
                  values = (0..(result.count / RedisUtilities::BLOCK_SIZE.to_f).floor).collect {|block_number| [block_number, RedisUtilities::BLOCK_SIZE]}
        
      
        
          
          
          
        
      
        
          
          
                  Sidekiq::Client.push_bulk('class' => BookVersionWorkers::QueueAmazonAuthorPageScrapeBlock, 'args' => values)
        
      
        
          
          
                  BookVersionWorkers::MonitorScrapeJobCount.perform_async scrape_count_key, MongoWorkers::GetAmazonAuthorPageStats.to_s
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueAmazonAuthorPageScrapeBlock
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(block_number, block_size)
        
      
        
          
          
                select_statement = "distinct on (tld, author_asin) '#{MongoUtilities.daily_collection_name(:stats)}' AS collection_name, array_agg(asin || '-' || tld) as mongo_ids, tld, author_asin"
        
      
        
          
          
                sql = WarehouseBookVersion.select(select_statement).where.not(author_asin: nil).group(:tld, :author_asin).order(:tld, :author_asin).offset(block_number * block_size).limit(block_size).to_sql
        
      
        
          
          
                result = WarehouseBookVersion.connection.execute(sql)
        
      
        
          
          
                params = result.values.collect {|collection_name, mongo_ids, tld, author_asin| [collection_name, mongo_ids[1..-2].split(','), tld, author_asin]}
        
      
        
          
          
          
        
      
        
          
          
                Sidekiq::Client.push_bulk_staged('class' => MongoWorkers::GetAmazonAuthorPageStats, 'args' => params) if params.present?
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class DownloadImage
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :background
        
      
        
          
          
          
        
      
        
          1
          
              def perform(warehouse_book_version_id, url)
        
      
        
          2
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          2
          
                  warehouse_book_version = WarehouseBookVersion.find warehouse_book_version_id
        
      
        
          2
          
                  if url.present? && Net::HTTP.get_response(URI.parse(URI.encode(url))).code == "200"
        
      
        
          2
          
                    begin
        
      
        
          2
          
                      warehouse_book_version.remote_book_version_image_url = url
        
      
        
          1
          
                      warehouse_book_version.save
        
      
        
          
          
                    rescue CarrierWave::ProcessingError
        
      
        
          
          
                      # Sometimes file is written incorrectly to tmp and just needs to be retried
        
      
        
          1
          
                      BookVersionWorkers::DownloadImage.perform_async warehouse_book_version_id, url
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class PopulateWebData
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(warehouse_book_version_id, asin, tld, author_name)
        
      
        
          2
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          2
          
                page = AmazonProductPage.by_asin_and_tld asin, tld
        
      
        
          2
          
                if handle_captcha(page, 60, warehouse_book_version_id, asin, tld, author_name)
        
      
        
          1
          
                  params = {id: warehouse_book_version_id}
        
      
        
          1
          
                  params[:sold_by] = page.scrape_sold_by
        
      
        
          1
          
                  params[:amazon_book_description] = page.scrape_amazon_description
        
      
        
          1
          
                  params[:author_asin] = page.scrape_author_asin
        
      
        
          
          
          
        
      
        
          1
          
                  if params[:author_asin].blank? && page.scrape_author_page_url.present? && page.scrape_author_name.present?
        
      
        
          
          
                    BookVersionWorkers::SetAuthorAsinFromAuthorPage.perform_async warehouse_book_version_id, page.scrape_author_page_url, page.scrape_author_name
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          1
          
                  BookVersionWorkers::Update.perform_async params
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class SetAuthorAsinFromAuthorPage
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(warehouse_book_version_id, author_page_url, scraped_author_name)
        
      
        
          
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          
          
                page = AmazonPage.new author_page_url
        
      
        
          
          
          
        
      
        
          
          
                if handle_captcha(page, 60, warehouse_book_version_id, author_page_url, scraped_author_name)
        
      
        
          
          
                  params = {id: warehouse_book_version_id}
        
      
        
          
          
                  author_links = page.dom.search(".//span[@class='ptBrand']/a") # old format
        
      
        
          
          
                  author_links = page.dom.css('h3.newaps span a') if author_links.blank? # old format
        
      
        
          
          
          
        
      
        
          
          
                  if author_links.present?
        
      
        
          
          
                    author_links.each do |author_link|
        
      
        
          
          
                      if author_link.text.strip == scraped_author_name
        
      
        
          
          
                        author_link_href = author_link['href']
        
      
        
          
          
                        params[:author_asin] = author_link_href.split('/')[3]
        
      
        
          
          
                      end
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  BookVersionWorkers::Update.perform_async params if params[:author_asin].present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class UpdateWebData
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(warehouse_book_version_id, asin, tld)
        
      
        
          2
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          2
          
                page = AmazonProductPage.by_asin_and_tld asin, tld
        
      
        
          2
          
                if handle_captcha(page, 60, warehouse_book_version_id, asin, tld)
        
      
        
          1
          
                  params = {id: warehouse_book_version_id}
        
      
        
          1
          
                  params[:sold_by] = page.scrape_sold_by
        
      
        
          1
          
                  params[:amazon_book_description] = page.scrape_amazon_description
        
      
        
          1
          
                  params[:physical_details] = page.physical_details
        
      
        
          1
          
                  params[:publisher] = page.scrape_publisher
        
      
        
          1
          
                  params[:pub_date] = page.scrape_pub_date
        
      
        
          1
          
                  params[:pages] = page.scrape_page_count
        
      
        
          1
          
                  BookVersionWorkers::Update.perform_async params
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class PopulateCanonicalUrls
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(warehouse_book_version_id)
        
      
        
          4
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          4
          
                  ProxyUtilities.force_proxy
        
      
        
          4
          
                  warehouse_book_version = WarehouseBookVersion.find warehouse_book_version_id
        
      
        
          
          
          
        
      
        
          4
          
                  if warehouse_book_version.tld == '.com'
        
      
        
          4
          
                    warehouse_book_version.canonical_bn_url = HttpHelper.get_canonical_bn_url warehouse_book_version.isbn13, warehouse_book_version.tld
        
      
        
          4
          
                    warehouse_book_version.canonical_goodreads_url = HttpHelper.get_canonical_goodread_url warehouse_book_version.isbn_or_asin if ISBN_Tools.is_valid? warehouse_book_version.isbn_or_asin
        
      
        
          4
          
                    warehouse_book_version.canonical_goodreads_url ||= HttpHelper.get_canonical_goodread_url warehouse_book_version.asin
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          4
          
                  warehouse_book_version.save!
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class ScheduleGetItunesMetadata
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          1
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          3
          
                  WarehouseBookVersion.ingested.where{isbn13 != nil}.where(itunes_id: nil).where{(book_format == 'Kindle Edition') | (book_format == 'Kindle Edition with Audio/Video')}.value_of(:id).each_slice(RedisUtilities::BLOCK_SIZE) do |warehouse_book_version_ids|
        
      
        
          1
          
                    Sidekiq::Client.push_bulk('class' => BookVersionWorkers::GetItunesMetadata, 'args' => warehouse_book_version_ids.collect(&method(:Array)))
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class ScheduleGermanCompetitiveScrape
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          
          
                  isbn_to_work_id = {}
        
      
        
          
          
                  MongoUtilities.work_id_collection.find.each {|x| isbn_to_work_id[x['_id']] = x['work_id']}
        
      
        
          
          
          
        
      
        
          
          
                  book_values = User.find_by_email('rhde@booklr.com').warehouse_book_versions.ingested.where(book_format: ['Paperback', 'Hardcover', 'Mass Market Paperback', 'Kindle Edition', 'Kindle Edition with Audio/Video', 'Board Book']).value_of(:asin, :book_format, :isbn13)
        
      
        
          
          
          
        
      
        
          
          
                  values = book_values.map {|value_array| value_array + [isbn_to_work_id[value_array[2]]] }
        
      
        
          
          
          
        
      
        
          
          
                  values.each_slice(10000) do |slice|
        
      
        
          
          
                    Sidekiq::Client.push_bulk('class' => MongoWorkers::GermanCompetitiveCoverage, 'args' => slice)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class GetItunesMetadata
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :background
        
      
        
          
          
          
        
      
        
          1
          
              def perform(warehouse_book_version_id)
        
      
        
          3
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          3
          
                  warehouse_book_version = WarehouseBookVersion.find warehouse_book_version_id
        
      
        
          3
          
                  if warehouse_book_version.isbn13.present? && warehouse_book_version.book_format.include?('Kindle')
        
      
        
          1
          
                    metadata = ItunesApi.get_metadata_by_isbn13 warehouse_book_version.isbn13
        
      
        
          1
          
                    warehouse_book_version.update_attributes! metadata unless metadata[:itunes_id].blank? || WarehouseBookVersion.where(itunes_id: metadata[:itunes_id]).exists?
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueSyncMongoAsinList
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          2
          
                Rails.logger.tagged('sync') {Rails.logger.info 'Starting asin list sync to Mongo'}
        
      
        
          1
          
                values = WarehouseBookVersion.order(:id).value_of(:id).each_slice(50000).collect do |id_slice|
        
      
        
          1
          
                  [id_slice.first, id_slice.last]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => BookVersionWorkers::SyncMongoAsinList, 'args' => values)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class SyncMongoAsinList
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :etl
        
      
        
          
          
          
        
      
        
          1
          
              def perform(starting_warehouse_book_version_id, ending_warehouse_book_version_id)
        
      
        
          1
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          1
          
                  all_asin_documents = WarehouseBookVersion.where(id: starting_warehouse_book_version_id..ending_warehouse_book_version_id).value_of(:asin, :tld).uniq.compact.collect do |asin, tld|
        
      
        
          2
          
                    MongoUtilities.all_asin_document asin, tld
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          1
          
                  MongoUtilities.add_documents_to_all_asin_list all_asin_documents
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class UpdateStatuses
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :etl
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          2
          
                collection = BookVersionStatusCollection.new
        
      
        
          2
          
                warehouse_book_version_ids_by_status = collection.find.each_with_object({}.with_indifferent_access) do |record, hash|
        
      
        
          3
          
                  hash[record['status']] ||= []
        
      
        
          3
          
                  hash[record['status']] << record['_id']
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          2
          
                warehouse_book_version_ids_by_status.each_pair do |status, warehouse_book_version_ids|
        
      
        
          3
          
                  WarehouseBookVersion.where(id: warehouse_book_version_ids).update_all(status: status)
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          2
          
                collection.drop
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module BooklrStatWorkers
        
      
        
          1
          
            class DailyBooklrStatReport
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform(date_string = Date.current.to_s)
        
      
        
          1
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          1
          
                  date = date_string.to_date
        
      
        
          1
          
                  booklr_stat = BooklrStat.find_by date: date
        
      
        
          1
          
                  stat_collection = MongoUtilities.daily_collection(:stats, date)
        
      
        
          1
          
                  first_stat_time = Time.zone.parse(stat_collection.find({'itunes_scraped_at' => {'$exists' => true}}).sort({itunes_scraped_at: 1}).limit(1).first['itunes_scraped_at'])
        
      
        
          1
          
                  last_stat_time = Time.zone.parse(stat_collection.find({'barnes_and_noble_scraped_at'=> {'$exists' => true}}).sort({barnes_and_noble_scraped_at: -1}).limit(1).first['barnes_and_noble_scraped_at'])
        
      
        
          1
          
                  total_time = ((last_stat_time - first_stat_time)/60.0/60.0).round(2)
        
      
        
          1
          
                  amazon_warehouse_category_id = WarehouseCategory.find_by(name: 'Kindle Store > Kindle eBooks').id
        
      
        
          1
          
                  warehouse_date_id = WarehouseDate.find_by date: date_string
        
      
        
          1
          
                  amazon_top_100_count = WarehouseListStat.where(warehouse_category_id: amazon_warehouse_category_id, warehouse_date_id: warehouse_date_id).count
        
      
        
          1
          
                  bn_top_100_count = WarehouseListStat.where(warehouse_date_id: warehouse_date_id, name: MongoListStatWorkers::BarnesNobleTop100::LIST_NAMES[:bn_nook_book_bestsellers]).count
        
      
        
          
          
          
        
      
        
          1
          
                  stat_hash = {}
        
      
        
          1
          
                  stat_hash['ingested books'] = booklr_stat.present? ? booklr_stat[:number_of_ingested_book_versions] : 'BooklrStat Object Not Created Today!'
        
      
        
          1
          
                  stat_hash['uningested_books'] = IngestionQueue.ingestions_available
        
      
        
          1
          
                  stat_hash['stat_run_time'] = "#{total_time} hours"
        
      
        
          1
          
                  stat_hash['approximate_nightly_cost'] = "~$#{(total_time * 200 * 0.05).round(2)}"
        
      
        
          
          
                  # pull in all report stats at time of run
        
      
        
          1
          
                  stat_hash.merge! $redis.hgetall('daily_report_stats')
        
      
        
          1
          
                  stat_hash['amazon_top_100_count'] = amazon_top_100_count == 100 ? amazon_top_100_count : "INVALID: [#{amazon_top_100_count}]"
        
      
        
          1
          
                  stat_hash['bn_top_100_count'] = bn_top_100_count == 100 ? bn_top_100_count : "INVALID: #{bn_top_100_count}"
        
      
        
          
          
          
        
      
        
          1
          
                  EnterpriseReportsMailer.booklr_status_report(stat_hash).deliver
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class CreateBooklrStat
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform(date_string)
        
      
        
          5
          
                begin
        
      
        
          5
          
                  ActiveRecord::Base.connection.uncached do
        
      
        
          10
          
                    Rails.logger.tagged('booklr_stat') {Rails.logger.info "Creating BooklrStat object for #{date_string} at #{Time.current}"}
        
      
        
          
          
          
        
      
        
          5
          
                    date = date_string.to_date
        
      
        
          5
          
                    booklr_stat = BooklrStat.where(date: date).first_or_create
        
      
        
          
          
          
        
      
        
          5
          
                    BooklrStatWorkers::SetNumberOfBookVersions.perform_async booklr_stat.id
        
      
        
          5
          
                    BooklrStatWorkers::SetNumberOfIngestedBookVersions.perform_async booklr_stat.id
        
      
        
          
          
          
        
      
        
          5
          
                    stats = MongoUtilities.daily_scrape_field_counts_collection.find_one(date: date_string)
        
      
        
          1
          
                    stats.each do |key, _|
        
      
        
          223
          
                      translation = translate_attribute_to_booklr_stat_attribute(key)
        
      
        
          223
          
                      booklr_stat[translation] = stats[key] if BooklrStat.accessible_attributes.include? translation
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          1
          
                    booklr_stat.save
        
      
        
          2
          
                    Rails.logger.tagged('booklr_stat') {Rails.logger.info "Completed creating BooklrStat object for #{date_string} at #{Time.current}"}
        
      
        
          
          
                  end
        
      
        
          4
          
                rescue => exception
        
      
        
          8
          
                  Rails.logger.tagged('booklr_stat') {Rails.logger.info "Error during BooklrStat creation: #{exception}"}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def translate_attribute_to_booklr_stat_attribute(attribute)
        
      
        
          223
          
                if %w[amazon_average_rating amazon_review_count bn_average_rating bn_review_count].include?(attribute)
        
      
        
          4
          
                  case attribute
        
      
        
          
          
                    when 'bn_average_rating'
        
      
        
          1
          
                      'barnes_average_rating_total'
        
      
        
          
          
                    when 'bn_review_count'
        
      
        
          1
          
                      'barnes_review_count_total'
        
      
        
          
          
                    else
        
      
        
          2
          
                      "#{attribute}_total"
        
      
        
          
          
                  end
        
      
        
          
          
                else
        
      
        
          219
          
                  case attribute
        
      
        
          
          
                    when 'warehouse_amazon_sales_rank_category_id'
        
      
        
          1
          
                      'amazon_sales_rank_category_total'
        
      
        
          
          
                    when 'bn_sales_rank'
        
      
        
          1
          
                      'barnes_sales_rank_total'
        
      
        
          
          
                    when 'warehouse_amazon_category1_id'
        
      
        
          1
          
                      'sub_category1_tree_total'
        
      
        
          
          
                    when 'amazon_category1_rank'
        
      
        
          1
          
                      'sub_category1_rank_total'
        
      
        
          
          
                    when 'warehouse_amazon_category2_id'
        
      
        
          1
          
                      'sub_category2_tree_total'
        
      
        
          
          
                    when 'amazon_category2_rank'
        
      
        
          1
          
                      'sub_category2_rank_total'
        
      
        
          
          
                    when 'warehouse_amazon_category3_id'
        
      
        
          1
          
                      'sub_category3_tree_total'
        
      
        
          
          
                    when 'amazon_category3_rank'
        
      
        
          1
          
                      'sub_category3_rank_total'
        
      
        
          
          
                    when 'amazon_likes'
        
      
        
          1
          
                      'likes_total'
        
      
        
          
          
                    when 'amazon_digital_list_price'
        
      
        
          1
          
                      'digital_list_price_total'
        
      
        
          
          
                    when 'amazon_also_bought_asin_1'
        
      
        
          1
          
                      'also_bought_total'
        
      
        
          
          
                    when 'amazon_bought_after_viewing_asin_1'
        
      
        
          1
          
                      'bought_after_viewing_total'
        
      
        
          
          
                    when 'amazon_frequently_bought_together_title_1'
        
      
        
          1
          
                      'frequently_bought_together_total'
        
      
        
          
          
                    when 'amazon_similar_item_category_tree_1'
        
      
        
          1
          
                      'similar_items_by_category_total'
        
      
        
          
          
                    when 'bn_also_bought_title_1'
        
      
        
          1
          
                      'bn_also_bought_total'
        
      
        
          
          
                    when 'sub_category1_author_rank'
        
      
        
          1
          
                      'author_ranks_total'
        
      
        
          
          
                    when 'amazon_related_format_data_total'
        
      
        
          1
          
                      'amazon_related_format_data_total'
        
      
        
          
          
                    when 'bn_related_format_data_total'
        
      
        
          1
          
                      'bn_related_format_data_total'
        
      
        
          
          
                    else
        
      
        
          201
          
                      "#{attribute}_total"
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class SetNumberOfBookVersions
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform(booklr_stat_id)
        
      
        
          1
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          1
          
                  booklr_stat = BooklrStat.find booklr_stat_id
        
      
        
          2
          
                  Rails.logger.tagged('booklr_stat') {Rails.logger.info "Setting BooklrStat number_of_book_versions for #{booklr_stat.date.to_s} at #{Time.current}"}
        
      
        
          
          
          
        
      
        
          2
          
                  booklr_stat.number_of_book_versions = WarehouseBookVersion.where {created_at <= booklr_stat.date.end_of_day}.count
        
      
        
          1
          
                  booklr_stat.save
        
      
        
          
          
          
        
      
        
          2
          
                  Rails.logger.tagged('booklr_stat') {Rails.logger.info "Completed setting BooklrStat number_of_book_versions for #{booklr_stat.date.to_s} at #{Time.current}"}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class SetNumberOfIngestedBookVersions
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform(booklr_stat_id)
        
      
        
          1
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          1
          
                  booklr_stat = BooklrStat.find booklr_stat_id
        
      
        
          2
          
                  Rails.logger.tagged('booklr_stat') {Rails.logger.info "Setting BooklrStat number_of_ingested_book_versions for #{booklr_stat.date.to_s} at #{Time.current}"}
        
      
        
          
          
          
        
      
        
          2
          
                  booklr_stat.number_of_ingested_book_versions = WarehouseBookVersion.ingested.where {created_at <= booklr_stat.date.end_of_day}.count
        
      
        
          1
          
                  booklr_stat.save
        
      
        
          
          
          
        
      
        
          2
          
                  Rails.logger.tagged('booklr_stat') {Rails.logger.info "Completed setting BooklrStat number_of_ingested_book_versions for #{booklr_stat.date.to_s} at #{Time.current}"}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module DataCleanupWorkers
        
      
        
          1
          
            class PopulateMissingIsbnsFromApi
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :background
        
      
        
          
          
          
        
      
        
          1
          
              def perform(book_version_id)
        
      
        
          1
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          1
          
                  book_version = WarehouseBookVersion.find book_version_id
        
      
        
          1
          
                  DataCleanup.populate_missing_isbn13_from_api book_version
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            #class BackfillMissingData
        
      
        
          
          
            #  include Sidekiq::Worker
        
      
        
          
          
            #  sidekiq_options queue: :background
        
      
        
          
          
            #
        
      
        
          
          
            #  def perform(book_version_id)
        
      
        
          
          
            #    ActiveRecord::Base.connection.uncached do
        
      
        
          
          
            #      book_version = WarehouseBookVersion.find book_version_id
        
      
        
          
          
            #      DataCleanup.backfill_missing_data book_version
        
      
        
          
          
            #    end
        
      
        
          
          
            #  end
        
      
        
          
          
            #end
        
      
        
          
          
          
        
      
        
          
          
            #class PopulateEanFromAsin
        
      
        
          
          
            #  include Sidekiq::Worker
        
      
        
          
          
            #  sidekiq_options queue: :background
        
      
        
          
          
            #
        
      
        
          
          
            #  def perform(book_version_id)
        
      
        
          
          
            #    ActiveRecord::Base.connection.uncached do
        
      
        
          
          
            #      book_version = WarehouseBookVersion.find book_version_id
        
      
        
          
          
            #      ean = DataCleanup.find_ean_from_asin book_version
        
      
        
          
          
            #
        
      
        
          
          
            #      # if EAN starts with 294 then it is a BN ID and we use bn_id, otherwise it is an isbn13
        
      
        
          
          
            #      if ean.present?
        
      
        
          
          
            #        if WarehouseBookVersion.where{((isbn13 == ean) | (bn_id == ean)) & (tld == '.com')}.exists?
        
      
        
          
          
            #          Rails.logger.tagged('book_data') {Rails.logger.info "ean match found for #{book_version.id} but key #{ean} already exists in db"}
        
      
        
          
          
            #        else
        
      
        
          
          
            #          key_type = Utilities.determine_key_type(ean)
        
      
        
          
          
            #          key_type == :bn_id ? book_version.update_attributes(bn_id: ean) : book_version.update_attributes(isbn13: ean)
        
      
        
          
          
            #          Rails.logger.tagged('book_data') {Rails.logger.info "found matching #{key_type}: #{ean} and setting #{key_type}"}
        
      
        
          
          
            #        end
        
      
        
          
          
            #      end
        
      
        
          
          
            #    end
        
      
        
          
          
            #  end
        
      
        
          
          
            #end
        
      
        
          
          
          end

    
      
        
          1
          
          module EnterpriseReports
        
      
        
          1
          
            module DailyReports
        
      
        
          1
          
              class RHPG
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(report_date_string, deliver_email = true)
        
      
        
          2
          
                  report_date = report_date_string.to_date
        
      
        
          2
          
                  client_name = :rhpg
        
      
        
          2
          
                  client_config = AmazeBot.config[:reports][:clients][client_name]
        
      
        
          
          
          
        
      
        
          2
          
                  users = User.where(email: %w[rhpg@booklr.com randomhousecomps@booklr.com]).to_a
        
      
        
          
          
          
        
      
        
          2
          
                  users.each do |user|
        
      
        
          4
          
                    asins = user.warehouse_book_versions.ingested.value_of(:asin)
        
      
        
          8
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "----- Report on #{asins.count} book versions for: #{user.name} -----"}
        
      
        
          
          
          
        
      
        
          4
          
                    amazon_rank_report_hash = EnterpriseReports.generate_report_hash("amazon-rank-#{report_date.strftime('%m%d%y')}-#{user.name.parameterize}", client_name)
        
      
        
          4
          
                    amazon_rank_csv = EnterpriseReports.open_csv(amazon_rank_report_hash)
        
      
        
          
          
          
        
      
        
          4
          
                    amazon_price_report_hash = EnterpriseReports.generate_report_hash("amazon-price-#{report_date.strftime('%m%d%y')}-#{user.name.parameterize}", client_name)
        
      
        
          4
          
                    amazon_price_csv = EnterpriseReports.open_csv(amazon_price_report_hash)
        
      
        
          
          
          
        
      
        
          4
          
                    bn_rank_report_hash = EnterpriseReports.generate_report_hash("bn-rank-#{report_date.strftime('%m%d%y')}-#{user.name.parameterize}", client_name)
        
      
        
          4
          
                    bn_rank_csv = EnterpriseReports.open_csv(bn_rank_report_hash)
        
      
        
          
          
          
        
      
        
          4
          
                    amazon_rank_header = ['Book Title', 'Author', 'ISBN/ASIN', 'Book Type', 'Category']
        
      
        
          4
          
                    other_header = ['Book Title', 'Author', 'ISBN/ASIN', 'Book Type']
        
      
        
          4
          
                    days = (report_date.yesterday).upto(report_date).to_a
        
      
        
          4
          
                    warehouse_date_ids = WarehouseDate.where(date: days).order(:date).value_of :id
        
      
        
          4
          
                    days.each do |day|
        
      
        
          8
          
                      amazon_rank_header << day.strftime('%m/%d/%Y')
        
      
        
          8
          
                      other_header << day.strftime('%m/%d/%Y')
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          4
          
                    amazon_rank_header << 'Percent Change'
        
      
        
          4
          
                    other_header << 'Percent Change'
        
      
        
          
          
                    # Append header row + last 7 date days
        
      
        
          4
          
                    amazon_rank_csv << amazon_rank_header
        
      
        
          4
          
                    amazon_price_csv << other_header
        
      
        
          4
          
                    bn_rank_csv << other_header
        
      
        
          
          
          
        
      
        
          4
          
                    warehouse_book_version_ids = WarehouseBookVersion.com.where(asin: asins).value_of :id
        
      
        
          4
          
                    warehouse_region_id = WarehouseRegion.com.id
        
      
        
          4
          
                    row_count = 0
        
      
        
          
          
          
        
      
        
          4
          
                    (warehouse_book_version_ids.count / 1000 + 1).times do |count|
        
      
        
          4
          
                      sql = WarehouseStat.single_query_join_and_select(WarehouseStat::WAREHOUSE_STAT_FIELDS,
        
      
        
          
          
                                                                       {warehouse_book_version: %w[id title isbn13 book_format author_name]},
        
      
        
          
          
                                                                       {warehouse_amazon_sales_rank_category: %w[name],
        
      
        
          
          
                                                                        warehouse_amazon_category1: %w[name],
        
      
        
          
          
                                                                        warehouse_amazon_category2: %w[name],
        
      
        
          4
          
                                                                        warehouse_amazon_category3: %w[name]}).where{warehouse_stats.warehouse_book_version_id.in warehouse_book_version_ids[(1000 * count)..(1000 * (count + 1) - 1)]}.where(warehouse_region_id: warehouse_region_id, warehouse_date_id: warehouse_date_ids).order(:warehouse_book_version_id, :created_at).to_sql
        
      
        
          
          
          
        
      
        
          12
          
                      ActiveRecord::Base.connection.execute(sql).chunk {|el| el['warehouse_book_version_id']}.each do |_, stats|
        
      
        
          12
          
                        day_1_stat = stats.select {|stat| stat['warehouse_date_id'] == warehouse_date_ids.first.to_s}.last.try(:with_indifferent_access)
        
      
        
          12
          
                        day_2_stat = stats.select {|stat| stat['warehouse_date_id'] == warehouse_date_ids.last.to_s}.last.try(:with_indifferent_access)
        
      
        
          4
          
                        stats = [day_1_stat, day_2_stat]
        
      
        
          4
          
                        reference_stat = day_1_stat || day_2_stat
        
      
        
          4
          
                        base_row = [reference_stat[:warehouse_book_version_title], reference_stat[:warehouse_book_version_author_name], EnterpriseReports.isbn_output(reference_stat[:warehouse_book_version_isbn13]), reference_stat[:warehouse_book_version_book_format]]
        
      
        
          
          
          
        
      
        
          4
          
                        amazon_rank_base_row = (base_row + [nil, nil, nil])
        
      
        
          4
          
                        [:amazon_sales, :amazon_category1, :amazon_category2, :amazon_category3].each do |method|
        
      
        
          16
          
                          if day_1_stat.present? && day_1_stat.send(:[], "#{method}_rank").present? || day_2_stat.present? && day_2_stat.send(:[], "#{method}_rank").present?
        
      
        
          4
          
                            el = day_1_stat || day_2_stat
        
      
        
          4
          
                            amazon_rank_stats_row = amazon_rank_base_row.dup
        
      
        
          4
          
                            amazon_rank_stats_row[4] = method == :amazon_sales ? el[:warehouse_amazon_sales_rank_category_name] : el.send(:[], "warehouse_#{method}_name")
        
      
        
          4
          
                            amazon_rank_stats_row[5] = day_1_stat.try :send, :[], "#{method}_rank"
        
      
        
          4
          
                            amazon_rank_stats_row[6] = day_2_stat.try :send, :[], "#{method}_rank"
        
      
        
          4
          
                            if amazon_rank_stats_row[5].present? && amazon_rank_stats_row[6].present?
        
      
        
          4
          
                              amazon_rank_stats_row[7] = "#{EnterpriseReports.percent_change_from_for_rank amazon_rank_stats_row[5].to_i, amazon_rank_stats_row[6].to_i}%"
        
      
        
          
          
                            end
        
      
        
          4
          
                            amazon_rank_csv << amazon_rank_stats_row
        
      
        
          
          
                          end
        
      
        
          
          
                        end
        
      
        
          
          
          
        
      
        
          4
          
                        amazon_price_stats_row = base_row.dup
        
      
        
          4
          
                        bn_rank_stats_row = base_row.dup
        
      
        
          4
          
                        bn_rank_stats_row[3] = (reference_stat[:warehouse_book_version_book_format].include?('Kindle') ? 'NOOK Book' : reference_stat[:warehouse_book_version_book_format])
        
      
        
          
          
          
        
      
        
          4
          
                        if stats[0].present?
        
      
        
          4
          
                          amazon_price_stats_row[4] = stats[0][:amazon_price].present? ? stats[0][:amazon_price].to_i / 100.0 : nil
        
      
        
          4
          
                          bn_rank_stats_row[4] = stats[0][:bn_sales_rank]
        
      
        
          
          
                        end
        
      
        
          
          
          
        
      
        
          4
          
                        if stats[1].present?
        
      
        
          4
          
                          amazon_price_stats_row[5] = stats[1][:amazon_price].present? ? stats[1][:amazon_price].to_i / 100.0 : nil
        
      
        
          4
          
                          bn_rank_stats_row[5] = stats[1][:bn_sales_rank]
        
      
        
          
          
                        end
        
      
        
          
          
          
        
      
        
          4
          
                        amazon_price_stats_row[6] = "#{EnterpriseReports.percent_change_from amazon_price_stats_row[4], amazon_price_stats_row[5]}%" if amazon_price_stats_row[4].present? && amazon_price_stats_row[5].present?
        
      
        
          4
          
                        bn_rank_stats_row[6] = "#{EnterpriseReports.percent_change_from_for_rank bn_rank_stats_row[4].to_i, bn_rank_stats_row[5].to_i}%" if bn_rank_stats_row[4].present? && bn_rank_stats_row[5].present?
        
      
        
          
          
          
        
      
        
          4
          
                        amazon_price_csv << amazon_price_stats_row
        
      
        
          4
          
                        bn_rank_csv << bn_rank_stats_row
        
      
        
          4
          
                        row_count += 1
        
      
        
          
          
                      end
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          
          
                    # Upload finished report whether its complete or not
        
      
        
          4
          
                    EnterpriseReports.move_to_s3(client_name, amazon_rank_csv)
        
      
        
          4
          
                    EnterpriseReports.move_to_s3(client_name, amazon_price_csv)
        
      
        
          4
          
                    EnterpriseReports.move_to_s3(client_name, bn_rank_csv)
        
      
        
          
          
          
        
      
        
          
          
                    # Determine if report is complete and then email about it and set redis details
        
      
        
          4
          
                    if EnterpriseReports.report_count_valid? row_count, asins.count
        
      
        
          2
          
                      $redis.hmset('daily_report_stats', 'rhpg-row-count', row_count, 'rhpg-send-time', Time.current.to_s)
        
      
        
          2
          
                      EnterpriseReportsMailer.basic_report([amazon_rank_report_hash, amazon_price_report_hash, bn_rank_report_hash], client_config[:reports][:daily_stats]).deliver if deliver_email
        
      
        
          4
          
                      Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{user.name.parameterize} Report Delivered -----"}
        
      
        
          
          
                    else
        
      
        
          2
          
                      EnterpriseReports.send_report_count_error "#{client_name}-daily", row_count, asins.count
        
      
        
          4
          
                      Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{user.name.parameterize}(#{client_name}) Report NOT Delivered: row count off by 0.5% of more -----"}
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          4
          
                    amazon_rank_csv.close
        
      
        
          4
          
                    amazon_price_csv.close
        
      
        
          4
          
                    bn_rank_csv.close
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module EnterpriseReports
        
      
        
          1
          
            module ExceptionReports
        
      
        
          1
          
              class RHPG
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(user_email, date_string)
        
      
        
          2
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info '--- Generating RHPG Exception Report ---'}
        
      
        
          1
          
                  user = User.find_by email: user_email
        
      
        
          1
          
                  date = date_string.to_date
        
      
        
          
          
          
        
      
        
          1
          
                  client_name = :rhpg
        
      
        
          1
          
                  client_config = AmazeBot.config[:reports][:clients][client_name]
        
      
        
          
          
          
        
      
        
          1
          
                  exception_report_header = ['Book Title', 'Author', 'ISBN/ASIN']
        
      
        
          
          
          
        
      
        
          1
          
                  file_name_ending = client_name
        
      
        
          
          
          
        
      
        
          1
          
                  amazon_report_hash = EnterpriseReports.generate_report_hash("amazon-exception-report-#{date.strftime('%m%d%y')}-#{file_name_ending}", client_name)
        
      
        
          1
          
                  amazon_exception_report_csv = EnterpriseReports.open_csv(amazon_report_hash)
        
      
        
          1
          
                  amazon_exception_report_csv << exception_report_header
        
      
        
          
          
          
        
      
        
          1
          
                  bn_report_hash = EnterpriseReports.generate_report_hash("bn-exception-report-#{date.strftime('%m%d%y')}-#{file_name_ending}", client_name)
        
      
        
          1
          
                  bn_exception_report_csv = EnterpriseReports.open_csv(bn_report_hash)
        
      
        
          1
          
                  bn_exception_report_csv << exception_report_header
        
      
        
          
          
          
        
      
        
          1
          
                  warehouse_date_id = WarehouseDate.find_by(date: date).id
        
      
        
          
          
          
        
      
        
          1
          
                  user.tracked_book_versions.includes(:warehouse_book_version).find_each do |tracked_book_version|
        
      
        
          1
          
                    if tracked_book_version.warehouse_book_version.book_version_exceptions.where(warehouse_date_id: warehouse_date_id).where(amazon_not_found_in_search: true).exists?
        
      
        
          1
          
                      amazon_exception_report_csv << EnterpriseReports.get_rhpg_exception_report_row(tracked_book_version)
        
      
        
          
          
                    end
        
      
        
          1
          
                    if tracked_book_version.warehouse_book_version.book_version_exceptions.where(warehouse_date_id: warehouse_date_id).where(bn_not_found_in_search: true).exists?
        
      
        
          1
          
                      bn_exception_report_csv << EnterpriseReports.get_rhpg_exception_report_row(tracked_book_version)
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          1
          
                  EnterpriseReports.move_to_s3(client_name, amazon_exception_report_csv)
        
      
        
          1
          
                  EnterpriseReports.move_to_s3(client_name, bn_exception_report_csv)
        
      
        
          1
          
                  EnterpriseReportsMailer.basic_report([amazon_report_hash, bn_report_hash], client_config[:reports][:daily_exception]).deliver
        
      
        
          
          
          
        
      
        
          1
          
                  amazon_exception_report_csv.close
        
      
        
          1
          
                  bn_exception_report_csv.close
        
      
        
          
          
          
        
      
        
          2
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info '--- RHPG Exception Report Completed ---'}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              class RandomHouseCorporate
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(user_email, date_string)
        
      
        
          2
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info '--- Generating RHINC Exception Report ---'}
        
      
        
          1
          
                  user = User.find_by email: user_email
        
      
        
          1
          
                  date = date_string.to_date
        
      
        
          
          
          
        
      
        
          1
          
                  client_name = :rhinc
        
      
        
          1
          
                  client_config = AmazeBot.config[:reports][:clients][client_name]
        
      
        
          
          
          
        
      
        
          1
          
                  exception_report_header = ['Book Title', 'Author', 'ISBN', 'ASIN', 'Book Format', 'Division Code', 'Timestamp', 'Missing On Amazon', 'Ambiguous Results', 'Buy Button Missing Amazon', 'Price Missing Amazon', 'Missing on BN', 'Missing on Itunes']
        
      
        
          
          
          
        
      
        
          1
          
                  file_name_ending = client_name
        
      
        
          
          
          
        
      
        
          1
          
                  report_hash = EnterpriseReports.generate_report_hash("exception-report-#{date.strftime('%m%d%y')}-#{file_name_ending}", client_name)
        
      
        
          1
          
                  exception_report_csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          1
          
                  exception_report_csv << exception_report_header
        
      
        
          
          
          
        
      
        
          1
          
                  warehouse_date_id = WarehouseDate.find_by(date: date).id
        
      
        
          1
          
                  user.tracked_book_versions.includes(warehouse_book_version: :book_version_exceptions).where('book_version_exceptions.warehouse_date_id = ?', warehouse_date_id).references(:book_version_exceptions).find_each do |tracked_book_version|
        
      
        
          1
          
                    book_version_exception = tracked_book_version.warehouse_book_version.book_version_exceptions.last
        
      
        
          
          
          
        
      
        
          
          
                    # If any of the validations show up as invalid, output this row, except do not include amazon image
        
      
        
          
          
                    # validations since those arent shown yet in this report
        
      
        
          
          
                    exception_report_csv << EnterpriseReports.get_rhinc_exception_report_row(tracked_book_version, book_version_exception) if
        
      
        
          
          
                        [:bn_not_found_in_search, :no_isbn, :amazon_not_found_in_search, :amazon_no_buy_button, :amazon_no_price,
        
      
        
          2
          
                         :amazon_ambiguous_result, :apple_invalid].any? {|method| book_version_exception.send method}
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          1
          
                  EnterpriseReports.move_to_s3(client_name, exception_report_csv)
        
      
        
          1
          
                  EnterpriseReports.ftp_to_client(client_name, exception_report_csv)
        
      
        
          1
          
                  EnterpriseReportsMailer.basic_report(report_hash, client_config[:reports][:daily_exception]).deliver
        
      
        
          
          
          
        
      
        
          1
          
                  exception_report_csv.close
        
      
        
          
          
          
        
      
        
          2
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "--- RHINC Exception Report Completed ---"}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              class Vook
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(user_email, date_string)
        
      
        
          
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info '--- Generating Vook Exception Report ---'}
        
      
        
          
          
                  user = User.find_by email: user_email
        
      
        
          
          
                  date = date_string.to_date
        
      
        
          
          
          
        
      
        
          
          
                  client_name = :vook
        
      
        
          
          
                  client_config = AmazeBot.config[:reports][:clients][client_name]
        
      
        
          
          
          
        
      
        
          
          
                  exception_report_header = ['Book Title', 'Author', 'ISBN', 'ASIN', 'Book Format', 'Division Code', 'Timestamp', 'Missing On Amazon', 'Ambiguous Results', 'Buy Button Missing Amazon', 'Price Missing Amazon', 'Missing on BN', 'Missing on Itunes']
        
      
        
          
          
          
        
      
        
          
          
                  file_name_ending = client_name
        
      
        
          
          
          
        
      
        
          
          
                  report_hash = EnterpriseReports.generate_report_hash("exception-report-#{date.strftime('%m%d%y')}-#{file_name_ending}", client_name)
        
      
        
          
          
                  exception_report_csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          
          
                  exception_report_csv << exception_report_header
        
      
        
          
          
          
        
      
        
          
          
                  warehouse_date_id = WarehouseDate.find_by(date: date).id
        
      
        
          
          
                  user.tracked_book_versions.includes(warehouse_book_version: :book_version_exceptions).where('book_version_exceptions.warehouse_date_id = ?', warehouse_date_id).references(:book_version_exceptions).find_each do |tracked_book_version|
        
      
        
          
          
                    book_version_exception = tracked_book_version.warehouse_book_version.book_version_exceptions.last
        
      
        
          
          
          
        
      
        
          
          
                    # If any of the validations show up as invalid, output this row, except do not include amazon image
        
      
        
          
          
                    # validations since those arent shown yet in this report
        
      
        
          
          
                    exception_report_csv << EnterpriseReports.get_rhinc_exception_report_row(tracked_book_version, book_version_exception) if
        
      
        
          
          
                        [:bn_not_found_in_search, :no_isbn, :amazon_not_found_in_search, :amazon_no_buy_button, :amazon_no_price,
        
      
        
          
          
                         :amazon_ambiguous_result, :apple_invalid].any? {|method| book_version_exception.send method}
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  EnterpriseReports.move_to_s3(client_name, exception_report_csv)
        
      
        
          
          
                  EnterpriseReportsMailer.basic_report(report_hash, client_config[:reports][:daily_exception]).deliver
        
      
        
          
          
          
        
      
        
          
          
                  exception_report_csv.close
        
      
        
          
          
          
        
      
        
          
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "--- RHINC Exception Report Completed ---"}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module EnterpriseReports
        
      
        
          1
          
            module ListStatReports
        
      
        
          1
          
              AMAZON_CATEGORY_LISTS = {
        
      
        
          
          
                                        perseus: {category_ids: %w[2365 2376 2394 2396 2399 171115 11322 3639 3573 3887 280311 21 11970 11119 4810 4869 16244431 5015 4837 4861 4853 4884 4891 4935 4947 4952 4954 4939 4948 4956 5032 4978 15812171 197501011 5011 14450 5020 5026 5027 5028 5030 5031 5035 720360 10177 4682 10753 11320 11401 11019 11232 12292 12300 12350 12360 12735 13871 13884 16272]},
        
      
        
          
          
                                        libboo: {category_ids: %w[157055011 154607011 154754011 319635011 157052011 156154011 158576011 157050011 156576011 157060011 157305011 157430011 157078011 157626011 158125011 158280011 158566011 158597011 158591011 156295011]},
        
      
        
          
          
                                        booklr: {category_ids: %w[2365 2376]},
        
      
        
          
          
                                        pll: {category_ids: %w[3511261011 6110890011 6064558011 6064559011 6064561011 6064560011 6064565011 6110891011 6064562011 6064564011 6190488011]},
        
      
        
          
          
                                        rhinc: {names: 'Kindle Store > Kindle eBooks'}
        
      
        
          
          
                                      }.freeze
        
      
        
          
          
          
        
      
        
          1
          
              class AmazonTop100
        
      
        
          1
          
                include EnterpriseReports
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(client_name, report_name, category_ids_or_names, start_date_string, end_date_string)
        
      
        
          4
          
                  date_range = start_date_string.to_date..end_date_string.to_date
        
      
        
          4
          
                  warehouse_date_ids = WarehouseDate.where(date: date_range).order(:date).value_of :id
        
      
        
          4
          
                  category_ids_or_names = category_ids_or_names.with_indifferent_access
        
      
        
          4
          
                  warehouse_region_id = WarehouseRegion.com.id
        
      
        
          8
          
                  warehouse_category_ids = WarehouseCategory.com.canonical.where{(category_id.in category_ids_or_names[:category_ids]) | (name.in category_ids_or_names[:names])}.value_of :id
        
      
        
          4
          
                  expected_count = warehouse_date_ids.count * warehouse_category_ids.count * 100
        
      
        
          
          
          
        
      
        
          4
          
                  report_file_name = date_range.count == 1 ? "#{client_name}-#{report_name.to_s.dasherize}-#{date_range.first.strftime("%m%d%y")}" : "#{client_name}-#{report_name.to_s.dasherize}-#{date_range.first.strftime("%m%d%y")}-#{date_range.last.strftime("%m%d%y")}"
        
      
        
          4
          
                  report_hash = EnterpriseReports.generate_report_hash report_file_name, client_name
        
      
        
          4
          
                  report_csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          
          
                  report_csv << ['List Name', 'List Rank', 'Author', 'Title', 'ASIN', 'Price', 'List Rank Date', 'Other category 1', 'Other category 1 Rank',
        
      
        
          4
          
                                 'Other category 2', 'Other category 2 Rank', 'Other category 3', 'Other category 3 Rank', 'Other category 4', 'Other category 4 Rank', 'Other category Rank Date']
        
      
        
          
          
          
        
      
        
          4
          
                  list_stat_sql = WarehouseListStat.single_query_join_and_select('*',
        
      
        
          
          
                                                                   {warehouse_date: %w[date],
        
      
        
          
          
                                                                    warehouse_category: %w[name category_id]},
        
      
        
          
          
                                                                   {warehouse_book_version: %w[id title asin author_name],
        
      
        
          4
          
                                                                    warehouse_trend: %w[name]}).where{warehouse_category_id.in warehouse_category_ids}.where(warehouse_date_id: warehouse_date_ids).order('warehouse_category_name, warehouse_date_date DESC, rank').to_sql
        
      
        
          4
          
                  list_stat_results = ActiveRecord::Base.connection.execute(list_stat_sql)
        
      
        
          704
          
                  asins = list_stat_results.collect {|result| result["warehouse_book_version_asin"]}.compact.uniq
        
      
        
          4
          
                  warehouse_book_version_ids = WarehouseBookVersion.com.where(asin: asins).value_of(:id)
        
      
        
          
          
          
        
      
        
          4
          
                  warehouse_stat_sql = WarehouseStat.single_query_join_and_select('*',
        
      
        
          
          
                                                                           {warehouse_book_version: %w[id title asin],
        
      
        
          
          
                                                                            warehouse_date: %w[date]},
        
      
        
          
          
                                                                           {warehouse_amazon_sales_rank_category: %w[name],
        
      
        
          
          
                                                                            warehouse_amazon_category1: %w[name],
        
      
        
          
          
                                                                            warehouse_amazon_category2: %w[name],
        
      
        
          4
          
                                                                            warehouse_amazon_category3: %w[name]}).where{warehouse_stats.warehouse_book_version_id.in warehouse_book_version_ids}.where(warehouse_date_id: warehouse_date_ids, warehouse_region_id: warehouse_region_id).to_sql
        
      
        
          4
          
                  warehouse_stat_results = ActiveRecord::Base.connection.execute(warehouse_stat_sql).collect{|result| result}
        
      
        
          
          
          
        
      
        
          4
          
                  list_stat_results.each do |list_stat_result|
        
      
        
          700
          
                    row = [list_stat_result['warehouse_category_name'], list_stat_result['rank'],
        
      
        
          
          
                           list_stat_result['warehouse_book_version_author_name'] || list_stat_result['author'],
        
      
        
          
          
                           list_stat_result['warehouse_book_version_title'] || list_stat_result['title'],
        
      
        
          
          
                           list_stat_result['warehouse_book_version_asin'] || list_stat_result['asin'],
        
      
        
          
          
                           list_stat_result['price'].try(:to_i).try(:/, 100.0), list_stat_result['warehouse_date_date']]
        
      
        
          700
          
                    warehouse_stat = warehouse_stat_results.find {|warehouse_stat_result| warehouse_stat_result["warehouse_book_version_asin"] == list_stat_result["asin"]}
        
      
        
          700
          
                    other_ranks = []
        
      
        
          700
          
                    if warehouse_stat.present?
        
      
        
          
          
                      sales_rank_category_name = warehouse_stat['warehouse_amazon_sales_rank_category_name'] || warehouse_stat['warehouse_amazon_sales_rank_category_id_fallback']
        
      
        
          
          
                      category1_name = warehouse_stat['warehouse_amazon_category1_name'] || warehouse_stat['warehouse_amazon_category1_id_fallback']
        
      
        
          
          
                      category2_name = warehouse_stat['warehouse_amazon_category2_name'] || warehouse_stat['warehouse_amazon_category2_id_fallback']
        
      
        
          
          
                      category3_name = warehouse_stat['warehouse_amazon_category3_name'] || warehouse_stat['warehouse_amazon_category3_id_fallback']
        
      
        
          
          
                      other_ranks += [sales_rank_category_name, warehouse_stat['amazon_sales_rank']] unless sales_rank_category_name == list_stat_result['warehouse_category_name']
        
      
        
          
          
                      other_ranks += [category1_name, warehouse_stat['amazon_category1_rank']] unless category1_name == list_stat_result['warehouse_category_name']
        
      
        
          
          
                      other_ranks += [category2_name, warehouse_stat['amazon_category2_rank']] unless category2_name == list_stat_result['warehouse_category_name']
        
      
        
          
          
                      other_ranks += [category3_name, warehouse_stat['amazon_category3_rank']] unless category3_name == list_stat_result['warehouse_category_name']
        
      
        
          
          
                    end
        
      
        
          700
          
                    row += EnterpriseReports.pad_serialized_data(other_ranks, 8) do |amazon_list_stat_sales_ranks|
        
      
        
          
          
                      amazon_list_stat_sales_ranks
        
      
        
          
          
                    end
        
      
        
          700
          
                    row += [list_stat_result['warehouse_date_date']]
        
      
        
          700
          
                    report_csv << row
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          4
          
                  report_csv.flush
        
      
        
          
          
          
        
      
        
          4
          
                  begin
        
      
        
          4
          
                    if EnterpriseReports.report_count_valid?(list_stat_results.count, expected_count)
        
      
        
          4
          
                      EnterpriseReports.move_to_s3(client_name, report_csv)
        
      
        
          3
          
                      EnterpriseReportsMailer.basic_report(report_hash, get_report_email_details(client_name, report_name)).deliver
        
      
        
          3
          
                      EnterpriseReports.ftp_to_rhpg(report_hash[:report_location]) if client_name.to_sym == :rhinc
        
      
        
          
          
                    else
        
      
        
          
          
                      EnterpriseReports.send_report_count_error report_file_name, list_stat_results.count, expected_count
        
      
        
          
          
                      Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_file_name} Report NOT Delivered: row count off by 0.5% of more -----"}
        
      
        
          
          
                    end
        
      
        
          1
          
                  rescue *HTTP_ERRORS => e
        
      
        
          2
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "Error trying to send top 100 amazon email or upload to FTP: #{e}"}
        
      
        
          
          
                  ensure
        
      
        
          4
          
                    report_csv.close
        
      
        
          4
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              class AmazonAllTop100
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(client_name, start_date_string = nil, end_date_string = nil)
        
      
        
          
          
                  start_date = start_date_string.try(:to_date) || Date.current
        
      
        
          
          
                  end_date = end_date_string.try(:to_date) || Date.current
        
      
        
          
          
                  warehouse_date_ids = WarehouseDate.where(date: (start_date..end_date)).order(:date).value_of :id
        
      
        
          
          
                  date_range = start_date..end_date
        
      
        
          
          
                  report_file_name = date_range.count == 1 ? "amazon-all-top-100-#{date_range.first.strftime("%m%d%y")}" : "amazon-all-top-100-#{date_range.first.strftime("%m%d%y")}-#{date_range.last.strftime("%m%d%y")}"
        
      
        
          
          
                  report_hash = EnterpriseReports.generate_report_hash report_file_name, client_name
        
      
        
          
          
          
        
      
        
          
          
          
        
      
        
          
          
                  list_stat_sql = WarehouseListStat.single_query_join_and_select('warehouse_category.name as "List Name", warehouse_list_stats.rank as "List Rank", warehouse_list_stats.asin as "ASIN", warehouse_list_stats.title as "Title", warehouse_list_stats.author as "Author", round(warehouse_list_stats.price / 100.0, 2) as "Price", warehouse_date.date as "List Rank Date"',
        
      
        
          
          
                                                                   {warehouse_date: [],
        
      
        
          
          
                                                                    warehouse_category: []},
        
      
        
          
          
                                                                   nil).where(warehouse_date_id: warehouse_date_ids).order('"List Name", "List Rank Date" DESC, rank').to_sql
        
      
        
          
          
                  report_csv = EnterpriseReports.sql_copy_to_csv(:amazon_all_top_100, report_hash[:report_location], WarehouseListStat.connection.raw_connection, list_stat_sql)
        
      
        
          
          
          
        
      
        
          
          
                  begin
        
      
        
          
          
                    EnterpriseReports.move_to_s3(client_name, report_csv)
        
      
        
          
          
                  rescue *HTTP_ERRORS => e
        
      
        
          
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "Error trying to send top 100 amazon email or upload to FTP: #{e}"}
        
      
        
          
          
                  ensure
        
      
        
          
          
                    report_csv.close
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              class GenerateBarnesAndNobleBestSellersReport
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(report_date_string = Date.current.to_s)
        
      
        
          2
          
                  report_date = report_date_string.to_date
        
      
        
          2
          
                  warehouse_date_id = WarehouseDate.where(date: report_date).value_of :id
        
      
        
          2
          
                  client_name = :rhinc
        
      
        
          2
          
                  client_config = AmazeBot.config[:reports][:clients][client_name]
        
      
        
          2
          
                  report_file_name = "top-100-barnes-and-noble-#{report_date.strftime("%m%d%y")}"
        
      
        
          2
          
                  report_hash = EnterpriseReports.generate_report_hash report_file_name, client_name
        
      
        
          2
          
                  report_csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          2
          
                  report_csv << ['Rank', 'Author', 'Title', 'ISBN', 'BN ID', 'Price', 'Date']
        
      
        
          2
          
                  row_count = 0
        
      
        
          
          
          
        
      
        
          2
          
                  WarehouseListStat.where(warehouse_date_id: warehouse_date_id, name: MongoListStatWorkers::BarnesNobleTop100::LIST_NAMES[:bn_nook_book_bestsellers]).order(:rank).each do |warehouse_list_stat|
        
      
        
          200
          
                    report_csv << [warehouse_list_stat.rank, warehouse_list_stat.author, warehouse_list_stat.title, warehouse_list_stat.isbn, warehouse_list_stat.bn_id, warehouse_list_stat.price.try(:/, 100.0), report_date]
        
      
        
          200
          
                    row_count += 1
        
      
        
          
          
                  end
        
      
        
          2
          
                  report_csv.flush
        
      
        
          
          
          
        
      
        
          2
          
                  begin
        
      
        
          
          
                    # This report should always put out 100 rows
        
      
        
          2
          
                    if EnterpriseReports.report_count_valid?(row_count, 100)
        
      
        
          2
          
                      EnterpriseReports.move_to_s3(client_name, report_csv)
        
      
        
          2
          
                      EnterpriseReports.ftp_to_client(client_name, report_csv)
        
      
        
          1
          
                      EnterpriseReportsMailer.basic_report(report_hash, client_config[:reports][:bn_top_100]).deliver
        
      
        
          2
          
                      Rails.logger.tagged('enterprise') {Rails.logger.info "Report Delivered to email lists and copied to production"}
        
      
        
          
          
                    else
        
      
        
          
          
                      EnterpriseReports.send_report_count_error report_file_name, row_count, 100
        
      
        
          
          
                      Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_file_name} Report NOT Delivered: row count off by 0.5% of more -----"}
        
      
        
          
          
                    end
        
      
        
          1
          
                  rescue *HTTP_ERRORS => e
        
      
        
          2
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "Error trying to send top 100 bn bestsellers email or upload to FTP"}
        
      
        
          
          
                  ensure
        
      
        
          2
          
                    report_csv.close
        
      
        
          2
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              class GenerateBarnesAndNobleTop100Report
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(date_string = Date.current.to_s)
        
      
        
          1
          
                  report_date = date_string.to_date
        
      
        
          1
          
                  warehouse_date_id = WarehouseDate.where(date: report_date).value_of :id
        
      
        
          1
          
                  client_name = :booklr
        
      
        
          1
          
                  client_config = AmazeBot.config[:reports][:clients][client_name]
        
      
        
          1
          
                  report_file_name = "real-top-100-barnes-and-noble-#{report_date.strftime("%m%d%y")}"
        
      
        
          1
          
                  report_hash = EnterpriseReports.generate_report_hash report_file_name, client_name
        
      
        
          1
          
                  report_csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          1
          
                  report_csv << ['Rank', 'Author', 'Title', 'ISBN', 'BN ID', 'Price', 'Date']
        
      
        
          1
          
                  row_count = 0
        
      
        
          
          
          
        
      
        
          1
          
                  WarehouseListStat.where(warehouse_date_id: warehouse_date_id, name: MongoListStatWorkers::BarnesNobleTop100::LIST_NAMES[:bn_top_100_nook_books]).order(:rank).each do |warehouse_list_stat|
        
      
        
          100
          
                    report_csv << [warehouse_list_stat.rank, warehouse_list_stat.author, warehouse_list_stat.title, warehouse_list_stat.isbn, warehouse_list_stat.bn_id, warehouse_list_stat.price.try(:/, 100.0), report_date]
        
      
        
          100
          
                    row_count += 1
        
      
        
          
          
                  end
        
      
        
          1
          
                  report_csv.flush
        
      
        
          
          
          
        
      
        
          
          
                  # This report should always put out 100 rows
        
      
        
          1
          
                  if EnterpriseReports.report_count_valid?(row_count, 100)
        
      
        
          1
          
                    EnterpriseReports.move_to_s3(client_name, report_csv)
        
      
        
          1
          
                    EnterpriseReportsMailer.basic_report(report_hash, client_config[:reports][:real_top_100_bn]).deliver
        
      
        
          2
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "Report Delivered to email lists and copied to production"}
        
      
        
          
          
                  else
        
      
        
          
          
                    EnterpriseReports.send_report_count_error report_file_name, row_count, 100
        
      
        
          
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_file_name} Report NOT Delivered: row count off by 0.5% of more -----"}
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          1
          
                  report_csv.close
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              class AppleAllTopBooksReport
        
      
        
          1
          
                include EnterpriseReports
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(report_date_string = Date.current.to_s)
        
      
        
          
          
                  warehouse_date_id = WarehouseDate.find_by(date: report_date_string.to_date).id
        
      
        
          
          
                  select_statement = 'warehouse_categories.name AS "List Name", list_type AS "List Type", rank AS "List Rank", author AS "Author", ' +
        
      
        
          
          
                                          'title AS "Title", itunes_id AS "iTunes ID", trunc(price / 100.0, 2) AS "Price", warehouse_dates.date AS "List Rank Date"'
        
      
        
          
          
                  sql = WarehouseListStat.select(select_statement).joins(:warehouse_date, :warehouse_category).where{warehouse_category.category_type == 'AppleBookCategory'}.where(warehouse_date_id: warehouse_date_id).order('warehouse_categories.name, list_type DESC, rank').to_sql
        
      
        
          
          
                  sql_copy_to_csv_and_deliver_report(sql, :booklr, "apple-all-books-ranks-report-#{report_date_string.to_date.strftime("%m%d%y")}", nil, ftp: false, emailable_report_name: :apple_top_books, gzip: false)
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module OneTimeReports
        
      
        
          1
          
            class Goodreads
        
      
        
          1
          
              include EnterpriseReports
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform(report_date_string, deliver_email = true)
        
      
        
          1
          
                report_date = report_date_string.to_date
        
      
        
          1
          
                warehouse_date_id = WarehouseDate.find_by(date: report_date).id
        
      
        
          1
          
                sql = WarehouseStat.single_query_join_and_select(%w[goodreads_work_average_rating goodreads_work_rating_count goodreads_work_review_count
        
      
        
          
          
                                                                    goodreads_work_added_by_count goodreads_work_to_read_count goodreads_edition_average_rating
        
      
        
          
          
                                                                    goodreads_edition_rating_count goodreads_edition_review_count goodreads_edition_added_by_count
        
      
        
          
          
                                                                    goodreads_5_star_count goodreads_4_star_count goodreads_3_star_count goodreads_2_star_count goodreads_1_star_count],
        
      
        
          
          
                                                                 {warehouse_book_version: %w[title book_format asin isbn13 bn_id author_name]}, nil).where(warehouse_region_id: WarehouseRegion.com.id, warehouse_date_id: warehouse_date_id).order(:warehouse_book_version_id).to_sql
        
      
        
          
          
          
        
      
        
          1
          
                sql_copy_to_csv_and_deliver_report(sql, :booklr, "goodreads-report-#{report_date.strftime("%m%d%y")}", WarehouseBookVersion.com.ingested.count, ftp: false, emailable_report_name: (deliver_email ? :goodreads : nil))
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class PllHistoricAttributeReport
        
      
        
          1
          
              include EnterpriseReports
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          
          
                asins = %w[B0058WCBOI B00G7J5NH8 B003JBI0QS B0067MSQEC B00CMLBK9U B0076LR1GW B00FO14UJM B007PLAVH4
        
      
        
          
          
                           B008PYM59C B007MF3NK0 B005DSA1T4 B0085AJQEI B00B0A5Y78 B00D3WHFHS B00GALGRJG B00DPN1SNW B00AOHDMFE
        
      
        
          
          
                           B00G75EQMA B00E5H5E3W B0051UBSLE B00C8324IS B0090U0J3Y B00DJUN2WG]
        
      
        
          
          
                warehouse_book_version_ids = WarehouseBookVersion.where(asin: asins).value_of :id
        
      
        
          
          
                columns = %w[amazon_price amazon_sales_rank amazon_review_count goodreads_work_rating_count]
        
      
        
          
          
                sql = WarehouseStat.select('warehouse_book_versions.asin, warehouse_dates.date').select(columns).joins(:warehouse_date, :warehouse_book_version).where(warehouse_book_version_id: warehouse_book_version_ids).order('warehouse_dates.date desc').order(:warehouse_book_version_id).to_sql
        
      
        
          
          
                res = WarehouseStat.connection.execute(sql)
        
      
        
          
          
                csvs = columns.each_with_object({}) {|column, hash| hash[column] = File.open("./tmp/pll_historical_#{column}_report_#{Date.current.to_s}.csv", 'wb')}
        
      
        
          
          
                current_asin = nil
        
      
        
          
          
                csvs.values.each {|csv| csv << ",#{res.values.transpose[1].uniq.join(',')}\n"}
        
      
        
          
          
                res.each do |row|
        
      
        
          
          
                  if row['asin'] != current_asin
        
      
        
          
          
                    if current_asin != nil
        
      
        
          
          
                      csvs.values.each {|csv| csv << "\n"}
        
      
        
          
          
                    end
        
      
        
          
          
                    csvs.values.each {|csv| csv << row['asin']}
        
      
        
          
          
                    current_asin = row['asin']
        
      
        
          
          
                  end
        
      
        
          
          
                  columns.each do |column|
        
      
        
          
          
                    value = column.include?('price') ? row[column].try(:to_i).try(:/, 100.0) : row[column]
        
      
        
          
          
                    csvs[column] << ",#{value}"
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                csvs.values.each do |csv|
        
      
        
          
          
                  csv.flush
        
      
        
          
          
                  csv.close
        
      
        
          
          
          
        
      
        
          
          
                  uploader = ReportUploader.new
        
      
        
          
          
                  uploader.client_name = :pll
        
      
        
          
          
                  uploader.store! File.new(csv.path)
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class FishRichardsonDimensionReport
        
      
        
          1
          
              include EnterpriseReports
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform(report_date_string)
        
      
        
          
          
                client_name = :booklr
        
      
        
          
          
                report_date = report_date_string.to_date
        
      
        
          
          
                warehouse_date_id = WarehouseDate.find_by(date: report_date).id
        
      
        
          
          
                warehouse_region_id = WarehouseRegion.com.id
        
      
        
          
          
          
        
      
        
          
          
                report_file_name = "fish-richardson-title-dimensions-report-#{Date.current.strftime("%m%d%y")}"
        
      
        
          
          
                report_hash = EnterpriseReports.generate_report_hash report_file_name, client_name
        
      
        
          
          
                report_csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          
          
                report_csv << ['ISBN', 'Title', 'Author', 'Format', 'Published Date', 'Length', 'Width', 'Depth', 'Similar Item Category 1', 'Similar Item Category 2', 'Similar Item Category 3']
        
      
        
          
          
          
        
      
        
          
          
                warehouse_book_version_ids = WarehouseBookVersion.ingested.com.where{physical_details != nil}.where(book_format: ["Paperback", "Hardcover"]).where("physical_details LIKE '%inches%'").order(:id).value_of(:id)
        
      
        
          
          
          
        
      
        
          
          
                (warehouse_book_version_ids.count / 1000 + 1).times do |count|
        
      
        
          
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- Generating F&R report for #{(1000 * count)..(1000 * (count + 1) - 1)} out of #{warehouse_book_version_ids.count} book versions -----"}
        
      
        
          
          
          
        
      
        
          
          
                  sql = WarehouseStat.single_query_join_and_select('amazon_similar_item_category_tree_1, amazon_similar_item_category_tree_2, amazon_similar_item_category_tree_3',
        
      
        
          
          
                                                                                  {warehouse_book_version: %w[isbn13 title author_name book_format pub_date physical_details]},
        
      
        
          
          
                                                                                  nil).where{warehouse_stats.warehouse_book_version_id.in warehouse_book_version_ids[(1000 * count)..(1000 * (count + 1) - 1)]}.where(warehouse_date_id: warehouse_date_id, warehouse_region_id: warehouse_region_id).to_sql
        
      
        
          
          
          
        
      
        
          
          
                  ActiveRecord::Base.connection.execute(sql).chunk {|el| el['warehouse_book_version_id']}.each do |_, warehouse_stat_result|
        
      
        
          
          
                    dimensions = warehouse_stat_result.values.last(6).last
        
      
        
          
          
                    length = dimensions.split(" x ").first
        
      
        
          
          
                    width = dimensions.split(" x ").second
        
      
        
          
          
                    depth = dimensions.split(" x ").third.present? ? dimensions.split(" x ").third.split(" inches").first : "N/A"
        
      
        
          
          
                    row = warehouse_stat_result.values.last(6).first(5) + [length] + [width] + [depth] + warehouse_stat_result.values.first(3)
        
      
        
          
          
                    report_csv << row
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                report_csv.flush
        
      
        
          
          
                EnterpriseReports.move_to_s3(client_name, report_csv)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module EnterpriseReports
        
      
        
          1
          
            module StatisticalReports
        
      
        
          1
          
              EXCLUDED_PUBLISHERS = ['12-Gauge Comics', '47North', '519 editores', 'ABC News', 'Abingdon Press', 'Abingdon Press Fiction', 'Accent Press', 'Accent Press Ltd', 'Ace', 'Adams Media', 'Aerie', 'Akashic Books', 'Akashic Noir Series', 'Akashic Urban Surreal Series', 'AKDigital', 'Alba Editorial', 'Albert Salvadó', 'Alfaguara', 'Algonquin Books', 'Allan Classics', 'Allison & Busby', 'Allison &amp; Busby', 'Alpha', "Amazon Children's Publishing / Plympton", 'AmazonCrossing', 'AmazonEncore', 'Anchor', 'Anchor; 1st edition', 'Angry Robot', 'Aspect', 'Atida Press, The Olive Group', 'Atlantic Monthly Press', 'Atria', 'Atria Books', 'Atria Books; 1 Reprint edition', 'Atria Books; First edition', 'Atria Books; Reprint edition', 'Atria/Emily Bestler Books', 'Avon', 'Avon Impulse', 'Avon Inspire', 'B de Books', 'Back Bay Books', 'Baile del Sol', 'Ballantine', 'Ballantine Books', 'Ballantine Books; 1 edition', 'Ballantine Books; 1st Ballantine Books Domestic Ed edition', 'Ballantine Books; 1st edition', 'Ballantine Books; Ballantine Books ed edition', 'Ballantine Books; First edition', 'Ballantine Books; Original edition', 'Ballantine Books; Reissue edition', 'Ballantine Books; Reprint edition', 'Ballantine Group', 'Bantam', 'Bantam Books', 'Bantam Classics', 'Bantam Dell Pub Group, Westminster, Maryland, U.S.A.', 'Bantam Doubleday Dell', 'Bantam Fiction', 'Bantam; 1 edition', 'Bantam; 1st edition', 'Bantam; Original edition', 'Bantam; Reissue edition', 'Bantam; Reprint edition', 'Bantam; Revised edition', 'Bastei Luebbe', 'BBC Digital', 'Berkley', 'Berkley; 1 edition', 'Berkley; 1st edition', 'Berkley; Original edition', 'Berkley; Reprint edition', 'Bloomsbury', 'Bloomsbury Reader', 'Bloomsbury USA', 'BLOOMSBURY USA/WALKER', 'Blue Hen', 'Blue Rider Press', 'Broadway', 'Broadway; Reprint edition', 'Byliner Inc.', 'CAB,TROYA', 'Cash Money Content; Original edition', 'Center Street', 'ChiZine', 'CISNE', 'Cornerstone Digital', 'Corvallis Press', 'Crimeline', 'Crimeline; Reissue edition', 'Crimeline; Reprint edition', 'Crimeline; Revised edition', 'Crown', 'Crown; 1 edition', 'Dark Horse Comics', 'Daw', 'DEBOLS!LLO', 'Del Rey', 'Del Rey; 1 edition', 'Del Rey; Original edition', 'Delacorte Pr', 'Delacorte Press', 'Delacorte Press; 1 edition', 'Delacorte Press; 1st edition', 'Delacorte Press; First edition', 'Delacorte Press; Original edition', 'Delacorte Press; Tra edition', 'Delacorte Pubns Inc', 'Dell', 'Dell Publishing Company', 'Dell; 1 edition', 'Dell; 1st edition', 'Dell; Dell Mass Market ed edition', 'Dell; Original edition', 'Dell; Reissue edition', 'Dell; Reprint edition', 'Delta', "Delta; Oprah's Book Club edition", 'Delta; Original edition', 'Delta; Reprint edition', 'Diversion Books', 'Doubleday', 'Doubleday Canada', 'Doubleday; 1st edition', 'Dover Publications', 'DUTTON ADULT', 'Dutton Adult', 'Dutton Guilt Edged Mysteries', 'Ecco', 'Ediciones Siruela', 'Editorial AlrevÃ©s', 'Editorial Amarante', 'Editorial Autores de Argentina', 'Editorial Kattigara', 'Editorial La Tuerca', 'Editorial Med&#xED;', 'Editorial Medí', 'Editorial Sur', 'eLibros', 'Emblem Editions', 'Europa', 'Faber and Faber Crime', 'FaithWords', 'FaithWords; 1 edition', 'fallen leaves press (TM) and ignacio hills press (TM) IgnacioHillsPress.com', 'fallen leaves press (TM), e-Pulp Adventures (TM) and ignacio hills press (TM) IgnacioHillsPress.com', 'fallen leaves press (TM), ignacio hills press (TM), E-Pulp Adventures (TM)', 'Farrar, Straus and Giroux', 'Farrar, Straus and Giroux; 1 edition', 'Fawcett', 'Fawcett; Reprint edition', 'Fawcett; Revised edition', 'Forever', 'Forever Yours', 'Forge Books', 'Forge Books; 1 edition', 'Francesco Libri', 'Free Press', 'FSG Originals', 'Gallery Books', 'Grand Central Publishing', 'Grand Central Publishing; 1 edition', 'Grand Central Publishing; 1st edition', 'Grand Central Publishing; First edition', 'Grand Central Publishing; Reprint edition', 'Graywolf Press', 'GRIJALBO', 'Grove Press', 'Grove Press, Black Cat', 'Grove/Atlantic', 'Grupo Nelson', 'Hachette Digital', 'Hackett Publishing Co.', 'Halcyon Press Ltd.', 'Hard Case Crime', 'Harlequin', 'Harlequin Anthology', 'Harlequin Historical', 'Harlequin IbÃ©rica, S.A.', 'Harlequin Intrigue', 'Harlequin Medical Romance', 'Harlequin MIRA', 'Harlequin Special Releases', 'Harlequin Treasury-Harlequin Intrigue 90s', 'Harlequin Treasury-Silhouette Special Edition 90s', 'Harper', 'Harper Design', 'Harper Paperbacks', 'Harper Perennial', 'Harper Perennial; Original edition', 'Harper Perennial; Reprint edition', 'Harper Voyager', 'Harper; Original edition', 'Harper; Reprint edition', 'HarperCollins', 'HarperCollins Canada', 'HarperCollins e-books', 'HarperCollins e-books; 1 edition', 'HarperCollins e-books; 1 Reissue edition', 'HarperCollins e-books; 1 Reprint edition', 'HarperCollins e-books; 1st edition', 'HarperCollins e-books; 256 edition', 'HarperCollins e-books; Mti Rep edition', 'HarperCollins e-books; Org Mti edition', 'HarperCollins e-books; Original edition', 'HarperCollins e-books; Reissue edition', 'HarperCollins e-books; Repack edition', 'HarperCollins e-books; Reprint edition', 'HarperCollins ebooks', 'HarperPerennial Classics', 'HarperPress', 'Harvard University Press', 'Headline', 'Headline Books, Inc.', 'Henry Holt and Co.', 'Hogarth', 'Holt Paperbacks', 'Houghton Mifflin Harcourt', 'Howard Books', 'Hyperion', 'Hyperion e-books', 'Ian Fleming Publications', 'ignacio hills press (TM) IgnacioHillsPress.com', 'ignacio hills press (TM) IgnacioHillsPress.com and e-Pulp Adventures (TM)', 'Il Gatto e la Luna', 'Il leone verde Edizioni', 'Image', 'InterMix', 'Island Books', 'Island Fiction', 'Ivy Books', 'Ivy Books; 1st Ballantine Books Ed edition', 'Ivy Books; 1st edition', 'Ivy Books; Reprint edition', 'JordÃ¡n Dorado', 'Jordi D&#xED;ez', 'Jove', 'Kensington', 'Kensington Books', 'Knopf', 'Knopf Canada', 'Knopf Group E-Books', 'La Esfera de los Libros', 'La Factor&#xED;a de Ideas', 'La Factoría de Ideas', 'La Flor del Itapebí', 'Lübbe Digital', 'Laboratorio Editorial TusRelatos SL', 'Little, Brown and Company', 'Little, Brown and Company; 1 edition', 'Little, Brown and Company; 1st edition', 'Loveswept', 'LUMEN', 'Macmillan', 'Macmillan Australia', 'Macmillan New Writing', 'Macmillan UK', 'McClelland & Stewart', 'McClelland &amp; Stewart', 'Metropolitan Books', 'Midnight Ink', 'Minotaur Books', 'Minotaur Books; 1 edition', 'Minotaur Books; Reprint edition', 'Mira', 'Mira; 1 edition', 'Mira; 1 Original edition', 'Mira; Original edition', 'Modern Library', 'Modern Library; Modern Library edition', 'Modern Library; Modern Library Pbk. Ed edition', 'MONDADORI', 'Montlake Romance', 'MTV Books', 'Mulholland Books', 'Mulholland Books; 1 edition', 'Mulholland Books; Reprint edition', 'Multnomah Books', 'Mysterious Press', 'MysteriousPress.com/Open Road', 'NAL', 'Nan A. Talese', 'Newmarket Press', 'North Point Press', 'NYRB Classics', 'Oceanview Publishing', 'One World/Ballantine', 'One World/Ballantine; 1st edition', 'One World/Ballantine; Reprint edition', 'One World/Strivers Row', 'Onyx', 'Open Road', 'Open Road E-riginal', 'Open Road Iconic Ebooks', 'Orb Books', 'Orbit', 'Ordóñez Díaz, Olegario / Ediciones Cátedra Pedagógica', 'Originally published by Avon Books in 1996', 'Orion', 'Overlook', 'Oxford University Press', 'Oxford University Press, UK', 'Oxford University Press, USA', 'Palgrave Macmillan', 'Pan', 'Pan Books', 'Pantheon', 'Penguin', 'Penguin Books', 'Penguin Classic', 'Penguin Non-Classics', 'Perigee', 'Phoenix', 'Picador', 'Picador; 1 edition', 'Pinnacle Books', 'Pintail', 'PLAZA & JANES', 'PLAZA &amp; JANES', 'PLAZA Y JANES', 'Plume', 'Pocket Books', 'POCKET BOOKS (SIMO)', 'Pocket Books/Star Trek', 'Pocket Star', 'Pocket Star; Original edition', 'Poisoned Pen Press', 'Presidio Press', 'Princeton University Press', 'Putnam Adult', 'Random House', 'Random House Australia', 'Random House Publishing Group', 'Random House Trade Paperbacks', 'Random House Trade Paperbacks; 1 edition', 'Random House Trade Paperbacks; Original edition', 'Random House Trade Paperbacks; Reprint edition', 'Random House, Inc.', 'Random House; 1 edition', 'Random House; 1st edition', 'Random House; 1st Unabridged edition', 'Random House; Book Club edition', 'Rayo', 'Reagan Arthur / Back Bay Books', 'Reagan Arthur Books', 'Reagan Arthur Books; 1 edition', 'Reagan Arthur Books; Special edition', 'Revell', 'Riverhead', 'Roc', 'ROSA VENTS', 'RosettaBooks', 'Rough Guides', 'Sarah Crichton Books', 'Scribner', 'Severn House Digital', 'Severn House Digital; Reprint edition', 'Severn Press', 'Severn Select', 'Signet', 'Signet Classics', 'Silhouette Bombshell', 'Silhouette Desire', 'Silhouette Intimate Moments', 'Silhouette Nocturne', 'Silhouette Romantic Suspense', 'Silhouette Special Releases', 'Simon & Schuster', 'Simon & Schuster; 1 edition', 'Simon & Schuster; 1 Reprint edition', 'Simon & Schuster; 1st edition', 'Simon & Schuster; 1st Simon & Schuster Hardcover Ed edition', 'Simon & Schuster; 1st Simon & Schuster Pbk. Ed edition', 'Simon & Schuster; Original edition', 'Simon & Schuster; Reprint edition', 'Simon &amp; Schuster', 'Simon &amp; Schuster Audio', 'Simon &amp; Schuster UK', 'Simon Pulse', 'Soft Skull Press', 'Soho Constable', 'Soho Crime', 'Soho Press', 'Sourcebooks Casablanca', 'Sourcebooks Landmark', 'Spectra', 'Spectra; Original edition', 'Spectra; Reissue edition', 'Spectra; Reprint edition', 'Spiegel & Grau', 'Spiegel & Grau; 1 edition', 'Spiegel &amp; Grau', "St. Martin's Dead Letter", "St. Martin's Griffin", "St. Martin's Griffin; 1 edition", "St. Martin's Griffin; Original edition", "St. Martin's Paperbacks", "St. Martin's Paperbacks; 1 edition", "St. Martin's Press", "St. Martin's Press; 1 edition", "St. Martin's Press; First Edition edition", "St. Martin's Press; Reprint edition", 'Strebor Books', 'SUDAMERICANA', 'Suma de letras', 'Tarcher', 'The Dial Press', 'The Dial Press; 1 edition', 'The Dial Press; 1st edition', 'The Penguin Press', 'Thomas & Mercer', 'Thomas &amp; Mercer', 'Thomas &amp; Mercer / Plympton', 'Thomas Dunne Books', 'Thomas Nelson', 'Thomas Nelson Publishers', 'Threshold Editions', 'TIME WARNER PAPERBAC', 'Titan Books', 'Tor Books', 'Tor Classics', 'Tor Fantasy', 'Touchstone', 'Touchstone; Original edition', 'Touchwood Editions', 'Transworld Digital', 'Twelve', 'Tyndale House Publishers, Inc.', 'Tyrus Books', 'University of Chicago Press', 'University of New Mexico Press', 'University of Wisconsin Press', 'University Press of Mississippi', 'University Press of New England', 'Vanguard Press', 'Vida', 'VIKING ADULT', 'Villard', 'Villard; 1st edition', 'Vintage', 'Vintage Canada', 'Vintage Digital', 'Vintage; 1 edition', 'Vintage; Reprint edition', 'Vision', 'W. W. Norton & Company', 'W. W. Norton &amp; Company', 'Walk Worthy Press', 'Walker Books', 'Walker Childrens', 'Washington Square Press', 'WaterBrook Press', 'William Morrow', 'William Morrow Paperbacks', 'William Morrow Paperbacks; 1 edition', 'William Morrow Paperbacks; Masterpiece ed edition', 'William Morrow Paperbacks; Open market ed edition', 'William Morrow Paperbacks; Original edition', 'William Morrow Paperbacks; Reissue edition', 'William Morrow Paperbacks; Reprint edition', 'William Morrow; 1 edition', 'William Morrow; Reprint edition', 'Windblown Media', 'Zondervan', 'David C. Cook', 'Bethany House', 'Bethany House Publishers', 'Bello', 'Chronicle Books', 'Melville International Crime', 'Steeple Hill Love Inspired Suspense', 'Steeple Hill Single Title', 'Bitter Lemon Press', 'Bold Strokes Books', 'd', 'Pegasus Books', 'Princeton Halls Press', 'Library Tales Publishing', 'Medallion Press', 'Mantle', 'Speck Press', 'Silhouette Athena Force', 'Acacia Publishing, Inc.', 'AAA Reality Games', 'Addison Moore', 'Addison Moore Publishing', 'Aladdin', 'Alfaguara Juvenil', 'Alloy Entertainment', 'AMACOM', "Amazon Children's Publishing", 'Amazon Publishing', 'Amazon.com', 'American Cancer Society', 'American Girl', 'American Psychological Association', 'AMG Publishers', 'Amulet Books', 'Amulet Books; 1st edition', 'Annick Press', 'Archie Comics', 'Arthur A. Levine Books', 'Astraea Press', 'Atheneum', 'Atheneum Books for Young Readers', 'Atlantic Publishing Group Inc.', 'Bancroft Press', 'Barbour Books', 'Beaufort Books', 'Bell Bridge Books', 'Berlinica Publishing LLC', 'Bluewood Books', 'Book Peddlers, The', 'Darby Creek', 'Michael Wiese Productions', "NYR Children's Collection", 'Platypus Press', 'Prufrock Press', 'Red Iris Books', 'Running Press Kids', 'Sandcastle Publishing LLC', 'WiDo Publishing', "Writer's Digest Books", 'Writers Digest Books', "Barron's Educational Series", 'BenBella Books', 'Bloomsbury Childrens', 'Bloomsbury Publishing', 'Bloomsbury USA Childrens', 'Body and Soul Publishing', 'Candlewick', 'Candlewish DRM-Free', 'Canterbury House Publishing', 'Carolrhoda Lab TM', 'Carpe Luna Publishing', 'Carpe Luna, Ltd.', 'Chicago Review Pr', 'Chicago Review Press', 'Chicken Soup for the Soul', 'Coliloquy, LLC', 'Cooper Square Publishing Llc', 'DC Comics', 'Disney', 'Disney Hyperion', 'Disney Hyperion; 1 edition', 'Echelon Press', 'ECW Press', 'Edic', 'Ediciones Selectas Diamante', 'Educational Game Books/Anti-Aging Press, Inc.', 'EgmontUSA', 'Entangled Teen', 'Evolved Publishing', 'Fairview Press', 'Flux', 'Free Spirirt Publishing', 'Free Spirit Publishing', 'Gibbering Gnome Press, A Division of Ingenious Inventions Run Amok, Ink', 'Gibbs Smith', 'Gibbs-Smith', 'Grace Publishing', 'Graphia', 'Hachette Digital ', 'Hachette India', 'Hampton Roads Pub Co', "Harcourt Children's Books", 'Harlequin Teen', 'HarperCollins; 1 edition', 'HarperCollins; 1st Avon Ed edition', 'HarperCollins; Reprint edition', "HarperCollinsChildren'sBooks", 'HarperTeen', 'HarperTeen; 1 edition', 'HarperTeen; 1st Avon Ed edition', 'Harvest House Publishers', 'Haunted Computer Books', 'Hay House', 'HCI', 'HCI Teens', 'Health Communications', 'HJ Kramer/New World Library', 'HMH', 'HMH Books', "Hodder Children's Books", 'Houghton Mifflin', 'Houghton Mifflin Books for Children', 'Houghton Mifflin Harcourt; 1 edition', 'Instant Help', 'Jessica Kingsley Publishers', 'Jossey-Bass', 'Kensington Publishing Corp', 'Kimani TRU', 'Kirkdale Press', 'Kregel Publications', 'Lift Every Voice', 'Little, Brown Books for Young Readers', 'Living Ink Books, an imprint of AMG Publishers', 'Llewellyn Publications', "Macmillan Children's Books", 'Marcher Lord Press', 'Marcus Institute of Commercial Modeling', 'Margaret K. McElderry Books', 'Marvel', 'Middlebury House Publishing', 'Moody Publishers', 'Musa Publishing', "National Geographic Children's Books", 'Navpress', 'NavPress Publishing Group', 'Open Road Young Readers', 'Orchard Books', "Orion Children's", 'Pelican Publishing Company', 'Point', 'Point; 1 edition', 'Prometheus Books', 'Puffin', 'Pyr', 'Quirk Books', 'Ragz Books', 'Random House Books for Young Readers', 'Raven Publishing, Inc. of Montana', 'Reprints', 'RHCB Digital', 'RHCP Digital', 'Roca Juvenil', 'Scholastic Inc.', 'Scholastic Paperbacks', 'Scholastic Press', 'Shadow Mountain', 'Simon & Schuster Books for Young Readers', 'Simon & Schuster Books for Young Readers; 1 edition', 'Simon & Schuster Books for Young Readers; 1 Reprint edition', 'Simon & Schuster Books for Young Readers; 1st Simon Pulse Ed edition', 'Simon & Schuster Books for Young Readers; Original edition', 'Simon & Schuster Books for Young Readers; Reprint edition', 'Simon & Schuster/Paula Wiseman Books; 1 edition', 'Simon &amp; Schuster Books for Young Readers', 'Simon Pulse/Mercury Ink', 'Simon Pulse/Mercury Ink; 1 edition', 'Simon Spotlight', 'Sky Pony Press', 'Sourcebooks Fire', 'Sourcebooks Jabberwocky', 'Spencer Hill Press', 'StoneHouse Ink', 'Tanglewood Press', 'Taylor &amp; Francis', 'The Chicken House', 'Tor Teen', 'Tuttle Publishing', 'Tyndale House Publishers', 'Tyndale Kids', 'University of Nebraska Press', 'Vanguard Management', 'Vanguard Management, Inc.', 'Wizards of the Coast', 'Workman Publishing Company', 'Yale University Press', 'Zest Books', 'Collins Educational', 'Delacorte Books for Young Readers', 'Laurel Leaf', 'Zonderkidz', 'Knopf Books for Young Readers', 'Soho Teen', 'Simon & Schuster/Paula Wiseman Books', 'Wiley', 'Tor', 'Enslow Publishers, Inc.', 'Facts on File', 'Facts on File (J)', 'Wiley-Blackwell', 'Alfaguara Infantil', 'Allen & Unwin', 'Almadraba Editorial', 'Almadraba Infantil y Juvenil', 'Ambush Books', 'David Fickling Books', 'Schwartz &amp; Wade', 'Razorbill', 'Signet Classic', 'Greenwillow Books', 'Collins', 'HarperFestival', 'HarperCollins; Revised edition', 'The Friday Project', 'HarperTeen; Reprint edition', 'Yen Press', 'MONTENA', 'Aladdin/Beyond Words', 'Simon & Schuster Books for Young Readers; 1st edition', 'Simon Pulse; Reprint edition', 'Dundurn', 'HarperCollins; 1 Reprint edition', 'Harper Element', 'Scholastic Non Fiction', 'Scholastic Non-Fiction', 'Allen &amp; Unwin', 'Springer Publishing Company', 'Rutgers University Press', 'Routledge', 'Scarecrow Press', 'Prestwick House, Inc.', 'Pook Press', 'Pelican Publishing', 'Academic Group Publishing - BRAII', 'Andrews McMeel Publishing LLC', 'AudioGO', 'Bailiwick Press', 'Baker Book House Company', 'Basic Books', "BBC Children's Books", 'Book View Cafe', 'Carolrhoda Books', 'Cedar Fort, Inc.', 'Chelsea House', 'Clarion Books', 'Conari Press', 'Disney Press', 'Edições Paulinas -São Paulo-Brazil', 'Ediciones B Mexico S.A. de C.V.', 'Egmont', 'Faber and Faber Plays', "Frances Lincoln Children's Books", 'Galaxy Press', 'Gospel Light', 'Greenwood', 'Groundwood Books', 'Harmony Ink Press', 'Hardie Grant Egmont', 'HarperCollins; 1st Avon ed edition', 'Jessica Kingsley', 'Jewish Lights Publishing', 'John Wiley &amp; Sons, Inc.', 'Kaplan Test Prep', 'La factorÃa de ideas', 'Lechner Syndications', 'Libraries Unlimited', 'M P Publishing Limited', 'McFarland', 'McGraw-Hill', 'Mirrorstone', 'Minnesota Historical Society Press', 'New Horizon Press', 'Nickelodeon Publishing', 'Nomad Press', 'Orca Book Publishers', 'Patria Press', 'Push', 'Regal', 'Roberts Rinehart', 'Roca Editorial', 'Running Press', 'Sandpiper', 'Simon & Schuster Books For Young Readers', 'Smart Pop', 'Sourcebooks', 'St. Paul Press', 'T & F Books UK', 'The Jewish Publication Society', 'Torch Legacy Publications', 'Ulysses Press', 'Univ Of Minnesota Press', 'University of California Press', 'Walter Foster', 'William Gladden Foundation Press', 'Zion Christian Publishers', 'Alfred A. Knopf', 'Amistad', 'Amistad; 1 edition', 'Authonomy', 'Avery', 'Back Stage Books', 'Baker Books', 'Balzer + Bray', 'Balzer + Bray; 1 edition', 'Bantam Books for Young Readers', 'Beach Lane Books', 'Bindu Books', 'Bloomberg Press', 'Bluefire', 'Cambridge University Press', 'Candle Books', 'Candlewick DRM-Free', 'Candlewick; Reprint edition', 'Celestial Arts', 'Chamberton Publishing', 'Chelsea House Pub (L)', 'Chelsea House Publications', 'Chronicle Books LLC', 'Cinco Puntos Press', 'Collins Voyager', 'Compass Press', 'Crossroad Press', 'Crown Archetype', 'Crushing Hearts and Black Butterfly Publishing', 'Curiosity Quills Press', 'Curtis Brown Digital', 'Dafina', 'Decadent Publishing Company', 'Dell Books for Young Readers', 'Desert Breeze Publishing, Inc', 'Devine Destinies', 'DIAL', 'DK Publishing', 'Doubleday Books for Young Readers', "DUTTON CHILDREN'S", 'e-penguin', 'Ebury Digital', 'Echelon Press LLC', 'Eerdmans Books for Young Readers', 'Eloquent Books', 'Ember', 'Entangled Publishing', 'ePenguin', 'Etopia Press', 'Evernight Publishing', "Faber and Faber Children's Books", 'Family Audio Library', 'Family Psychological Press', 'Farrar, Straus and Giroux (BYR)', 'Farrar, Strauss & Giroux', 'Featherweight Publishing', 'Feiwel & Friends', 'Feiwel &amp; Friends', 'Firebird', 'First Second', 'Flash Point', 'Fourth Estate', 'Fremantle Press', 'Gallery', 'Gibbs Smith, Publisher', 'Golden Books', 'Gotham Books', 'Greenwillow Books; 1 edition', 'Greenwillow Books; 1st edition', 'Grosset & Dunlap', 'Grosset &amp; Dunlap', 'Gypsy Shadow Publishing', 'Hampton Roads Publishing', 'Harlequin Blaze', 'Harlequin HQN', 'Harlequin Nocturne', 'HarperCollins; 1st edition', 'HarperOne', 'HarperTeen; Revised edition', 'HarperTrue', 'Henry Holt and Co. (BYR)', 'Holiday House', 'Houghton Mifflin Books for Children; Reprint edition', 'Houghton Mifflin Harcourt; Reprint edition', 'HQN Books', 'Image Cascade Publishing', 'ImaJinn Books, Inc', 'Inkspell Publishing', 'Itoh Press', 'K-Teen/Dafina', 'Katherine Tegen Books', 'Katherine Tegen Books; 1 edition', 'Kelpies', 'Lands Atlantic Publishing', 'Leap Books', 'Limitless Publishing LLC', "Lion Children's", 'LITTLE BROWN BKS YOUNG READERS', 'Little, Brown', 'Little, Brown Books for Young Readers; 1 edition', 'Little, Brown Books for Young Readers; 1st edition', 'Lovestruck Literary', 'Lyrical Press, Inc.', 'Martin Sisters Publishing LLC', 'Merit Press', 'Merriam-Webster, Inc.', 'Modern Library; Tra edition', 'Montena', 'MP Publishing Limited', 'MTV Books; Original edition', 'Mundania Press LLC', 'MuseItUp Publishing', 'Myrddin Publishing Group', 'Neeland Media LLC', 'New Canadian Library', 'Noble Romance Publishing, LLC', 'Noble Young Adult - Not Just Romance!', 'NPC Books', 'NUBE DE TINTA', 'OakTara', 'One World/Ballantine; 1 edition', 'Open Road Media', 'Open Road Media Young Readers', 'Outskirts Press', 'Outskirts Press, Inc.', 'Papercutz', 'Penguin Young Readers', 'Persea', 'PHILOMEL', 'Pocket Star; Reprint edition', 'Poppy', 'Potter Craft', 'Prizm Books', 'PSS Juvenile', 'Putnam Juvenile', 'Queerteen Press', 'Random House BFYR', 'Rhemalda Publishing', 'Ripley Publishing', 'Roaring Brook Press', 'Samhain Publishing, Ltd.', 'Scholastic Fiction', 'Scholastic Press; 1 edition', 'Schwartz & Wade', 'Screech Owls', 'Secret Cravings Publishing', 'Simon & Schuster Books for Young Readers; New title edition', 'Simon & Schuster Books for Young Readers; Reissue edition', "Simon &amp; Schuster Children's Publishing", 'Simon &amp; Schuster/Paula Wiseman Books', 'Simon Pulse; 1 edition', 'Simon Pulse; Original edition', 'Simon Pulse/Beyond Words', 'Siren Publishing', 'Skylark', 'Sleeping Bear Press', 'Solstice Publishing', 'Speak', 'Square Fish', 'Starscape', 'Storey Publishing, LLC', 'Ten Speed Press', "The O'Brien Press", 'Three Rivers Press', 'Top Shelf Productions', 'Townsend Press', 'Trafford Publishing', 'Tricycle Press', 'Tundra Books', 'Turquoise Morning Press', 'University Of Chicago Press', 'University of Minnesota Press', 'University of Nevada Press', 'University of Pennsylvania Press', 'University of Queensland Press', "Viking Children's", 'Viking Juvenile', 'Walden Pond Press', 'Watson-Guptill', 'Weinstein Books', 'Wendy Lamb Books', 'Wheatmark', 'Whiskey Creek Press LLC', 'Wild Child Publishing', 'Wm. B. Eerdmans Publishing Company', 'World Castle Publishing', 'Yearling', 'Young Picador', 'Zebra', 'Zebra Books', 'Zeta Comics', 'Zondervan/Youth Specialties', 'Faber and Faber Fiction', 'Mercury Ink', 'Hodder', 'Penguin Classics', 'PUTNAM', 'Gallery Books/G-Unit', 'Broadway Books', 'Viking Adult', 'Redhook', 'Crown Forum', 'Dial Press Trade Paperback', 'Dreamspinner Press', 'DAW', 'Faber and Faber Non Fiction', 'Faber and Faber Poetry', 'Hodder & Stoughton', 'LucasBooks', 'Simon & Schuster; Rep Una edition', 'Putnam', "G.P. Putnam's Sons", 'Review', 'HarperCollins e-books; Ecco edition', 'Winepress Publishing', 'HarperCollins Entertainment', 'Blue Door', 'HarperCollins; Mti edition', 'Faber Finds', 'Voyager'].freeze
        
      
        
          1
          
              EXCLUDED_SOLD_BY = ['Hachette Book Group','HarperCollins Publishers','HarperCollins Publishing','Macmillan','Penguin Publishing','Random House Digital, Inc.','Random House Mondadori','Simon and Schuster Digital Sales Inc'].freeze
        
      
        
          
          
          
        
      
        
          1
          
              class AsinOnlyIngestedTitlesReport
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform
        
      
        
          1
          
                  client_name = :booklr
        
      
        
          1
          
                  report_hash = EnterpriseReports.generate_report_hash("asin-only-report-#{Date.current.strftime("%m%d%y")}", client_name)
        
      
        
          1
          
                  csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          
          
          
        
      
        
          1
          
                  csv << ['ASIN', 'TLD', 'Title', 'Author', 'Publisher', 'Format', 'Published Date', 'Page Count', 'Amazon Link', 'Matched ISBN13', 'Matched BN ID', 'No Match Found']
        
      
        
          
          
          
        
      
        
          1
          
                  warehousebook_version_ids = WarehouseBookVersion.where("isbn13 is null and asin is not null and bn_id is null").ingested.order(:source).value_of :id
        
      
        
          
          
          
        
      
        
          1
          
                  (warehousebook_version_ids.count / 1000 + 1).times do |x|
        
      
        
          1
          
                    WarehouseBookVersion.select([:id, :tld, :asin, :title, :author_name, :publisher, :book_format, :pub_date, :pages]).where(id: warehousebook_version_ids[(1000 * x)..(1000 * (x + 1) - 1)], status: "ingested").order(:source).each do |warehouse_book_version|
        
      
        
          1
          
                      csv << [warehouse_book_version.asin, warehouse_book_version.tld, warehouse_book_version.title, warehouse_book_version.author_name, warehouse_book_version.publisher, warehouse_book_version.book_format, warehouse_book_version.pub_date, warehouse_book_version.pages, warehouse_book_version.amazon_url]
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          1
          
                  csv.flush
        
      
        
          
          
          
        
      
        
          1
          
                  EnterpriseReports.move_to_s3(client_name, csv)
        
      
        
          1
          
                  csv.close
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              class Identification
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(report_date_string, amazon_average_rating, amazon_review_count, days_of_data_min, min_page_count, add_extra_columns, report_name, categories)
        
      
        
          10
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_name.to_s.camelcase} Identification Report -----"}
        
      
        
          
          
          
        
      
        
          5
          
                  report_date = report_date_string.to_date
        
      
        
          5
          
                  warehouse_date_id = WarehouseDate.find_by(date: report_date).id
        
      
        
          5
          
                  warehouse_region_id = WarehouseRegion.com.id
        
      
        
          5
          
                  creation_date_minimum = (Date.current - days_of_data_min.days).in_time_zone.to_s
        
      
        
          5
          
                  starting_warehouse_date_id = WarehouseDate.find_by(date: report_date - 89.days).id
        
      
        
          5
          
                  warehouse_dates = WarehouseDate.where(id: starting_warehouse_date_id..warehouse_date_id)
        
      
        
          5
          
                  warehouse_date_ids = WarehouseDate.where(id: starting_warehouse_date_id..warehouse_date_id).value_of(:id)
        
      
        
          5
          
                  warehouse_categories = WarehouseCategory.amazon
        
      
        
          5
          
                  client_name = :booklr
        
      
        
          5
          
                  client_config = AmazeBot.config[:reports][:clients][client_name]
        
      
        
          
          
          
        
      
        
          10
          
                  warehouse_book_versions = WarehouseBookVersion.where{(warehouse_stats.warehouse_date_id == warehouse_date_id) & (warehouse_stats.warehouse_region_id == warehouse_region_id)}.where("warehouse_stats.amazon_average_rating > #{amazon_average_rating} and warehouse_stats.amazon_review_count > #{amazon_review_count}").where('warehouse_book_versions.created_at < ?', creation_date_minimum).where{
        
      
        
          20
          
                      (warehouse_stats.amazon_similar_item_category_tree_1.like_any my{categories}) | (warehouse_stats.amazon_similar_item_category_tree_2.like_any my{categories}) | (warehouse_stats.amazon_similar_item_category_tree_3.like_any my{categories}) |
        
      
        
          20
          
                      (warehouse_stats.amazon_similar_item_category_tree_4.like_any my{categories}) | (warehouse_stats.amazon_similar_item_category_tree_5.like_any my{categories}) | (warehouse_stats.amazon_similar_item_category_tree_6.like_any my{categories}) |
        
      
        
          20
          
                      (warehouse_stats.amazon_similar_item_category_tree_7.like_any my{categories}) | (warehouse_stats.amazon_similar_item_category_tree_8.like_any my{categories}) | (warehouse_stats.amazon_similar_item_category_tree_9.like_any my{categories}) |
        
      
        
          20
          
                      (warehouse_stats.amazon_similar_item_category_tree_10.like_any my{categories}) | (warehouse_stats.amazon_similar_item_category_tree_11.like_any my{categories}) | (warehouse_stats.amazon_similar_item_category_tree_12.like_any my{categories}) |
        
      
        
          25
          
                      (warehouse_stats.amazon_similar_item_category_tree_13.like_any my{categories}) | (warehouse_stats.amazon_similar_item_category_tree_14.like_any my{categories}) | (warehouse_stats.amazon_similar_item_category_tree_15.like_any my{categories})}.joins(:warehouse_stats).to_a
        
      
        
          
          
          
        
      
        
          
          
                  # Exclude publishers and get uniq list
        
      
        
          5
          
                  warehouse_book_versions = warehouse_book_versions.reject {|warehouse_book_version| EnterpriseReports::StatisticalReports::EXCLUDED_PUBLISHERS.include? warehouse_book_version.publisher}
        
      
        
          5
          
                  warehouse_book_versions = warehouse_book_versions.reject {|warehouse_book_version| EnterpriseReports::StatisticalReports::EXCLUDED_SOLD_BY.include? warehouse_book_version.sold_by}
        
      
        
          5
          
                  warehouse_book_versions = warehouse_book_versions.reject {|warehouse_book_version| warehouse_book_version.pages.to_i < min_page_count} if min_page_count
        
      
        
          5
          
                  warehouse_book_version_ids = warehouse_book_versions.collect(&:id).uniq
        
      
        
          
          
          
        
      
        
          10
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_name.to_s.camelcase} Identification Complete, report generation starting -----"}
        
      
        
          
          
          
        
      
        
          5
          
                  report_hash = EnterpriseReports.generate_report_hash("#{report_name}-identification-report-#{report_date.strftime("%m%d%y")}", client_name)
        
      
        
          5
          
                  report_csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          
          
          
        
      
        
          
          
                  # Append header row
        
      
        
          5
          
                  header = ['Title', 'Author', 'ASIN', 'ISBN', 'Pub Date', 'Publisher']
        
      
        
          5
          
                  header += ['Sold By'] if add_extra_columns
        
      
        
          5
          
                  header += ['Page Count', 'Days of Data', 'Created Date', 'Total Days Since Creation' '90 Day Average Overall Rank', '30 Day Moving Average',
        
      
        
          
          
                             '7 Day Moving Average', 'Trendline Growth %', 'R-Squared', '90 Day Overall Rank Growth Rate']
        
      
        
          5
          
                  header += ['90 Day Overall Rank Volatility', 'Apple Number of Ratings', 'BN number of Ratings'] if add_extra_columns
        
      
        
          5
          
                  header += ['Amazon Number of Likes', 'Amazon Number of Ratings']
        
      
        
          5
          
                  header += ['Ratings Per Day Since Published', 'Reviews Per Day Over Last 90 Days'] if add_extra_columns
        
      
        
          5
          
                  header += ['Average Star Rating', '% of Ratings 4 or above', '#1 Similar Category', '#2 Similar Category', '#3 Similar Category',
        
      
        
          
          
                             '#1 Sub Category', '#1 Sub Category Percentage', '#2 Sub Category', '#2 Sub Category Percentage', '#3 Sub Category',
        
      
        
          
          
                             '#3 Sub Category Percentage', 'Current Sales Rank', 'Current Price', 'Product URL', 'Amazon Description']
        
      
        
          5
          
                  report_csv << header
        
      
        
          
          
          
        
      
        
          5
          
                  block_size = 20
        
      
        
          5
          
                  (warehouse_book_version_ids.count / block_size + 1).times do |count|
        
      
        
          10
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "----- Generating #{report_name.to_s.camelcase} report for #{(block_size * count)..(block_size * (count + 1) - 1)} out of #{warehouse_book_version_ids.count} book versions at #{Time.current}-----"}
        
      
        
          
          
          
        
      
        
          
          
                    WarehouseStat.select(WarehouseStat::WAREHOUSE_STAT_FIELDS + WarehouseStat::WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_TREE_FIELDS).where(
        
      
        
          5
          
                                        warehouse_book_version_id: warehouse_book_version_ids[(block_size * count)..(block_size * (count + 1) - 1)]).where(
        
      
        
          
          
                                        warehouse_date_id: warehouse_date_ids, warehouse_region_id: warehouse_region_id).order(:warehouse_book_version_id, :warehouse_date_id).chunk{
        
      
        
          5
          
                                        |el| el['warehouse_book_version_id']}.each do |warehouse_book_version_id, warehouse_stats|
        
      
        
          
          
          
        
      
        
          
          
                      warehouse_book_version = warehouse_book_versions.select {|warehouse_book_version| warehouse_book_version.id == warehouse_book_version_id}.first
        
      
        
          
          
                      if warehouse_stats.collect(&:amazon_sales_rank).compact.blank?
        
      
        
          
          
                        Rails.logger.tagged('enterprise') {Rails.logger.info "skipping warehouse_book_version #{warehouse_book_version.id}, no sales rank data"}
        
      
        
          
          
                        next
        
      
        
          
          
                      elsif warehouse_stats.count < days_of_data_min
        
      
        
          
          
                        Rails.logger.tagged('enterprise') {Rails.logger.info "skipping warehouse_book_version #{warehouse_book_version.id}, less than #{days_of_data_min} days of data"}
        
      
        
          
          
                        next
        
      
        
          
          
                      end
        
      
        
          
          
          
        
      
        
          
          
                      stat_count = warehouse_stats.count
        
      
        
          
          
                      first_stat = warehouse_stats.first
        
      
        
          
          
                      last_stat = warehouse_stats.last
        
      
        
          
          
                      created_date = WarehouseBookVersion.find_by(asin: warehouse_book_version.asin).created_at.to_date
        
      
        
          
          
                      total_days = (Date.current - created_date).to_i
        
      
        
          
          
          
        
      
        
          
          
                      first_stat_date = warehouse_dates.select{|warehouse_date| warehouse_date.id == first_stat.warehouse_date_id}.first.date
        
      
        
          
          
                      last_stat_date = warehouse_dates.select{|warehouse_date| warehouse_date.id == last_stat.warehouse_date_id}.first.date
        
      
        
          
          
                      row = [warehouse_book_version.title, warehouse_book_version.author_name, warehouse_book_version.asin, warehouse_book_version.isbn13,
        
      
        
          
          
                             warehouse_book_version.pub_date, warehouse_book_version.publisher]
        
      
        
          
          
                      row << warehouse_book_version.sold_by if add_extra_columns
        
      
        
          
          
                      row += [warehouse_book_version.pages, stat_count, created_date.to_s, total_days]
        
      
        
          
          
          
        
      
        
          
          
                      # Regression Setup
        
      
        
          
          
                      amazon_sales_ranks = warehouse_stats.collect(&:amazon_sales_rank).compact
        
      
        
          
          
                      amazon_sales_ranks_x_values = (1..amazon_sales_ranks.count).to_a
        
      
        
          
          
                      line_fit = LineFit.new
        
      
        
          
          
                      valid = line_fit.setData(amazon_sales_ranks_x_values, amazon_sales_ranks)
        
      
        
          
          
          
        
      
        
          
          
                      row << amazon_sales_ranks.mean
        
      
        
          
          
                      row << amazon_sales_ranks.moving_average(30).last
        
      
        
          
          
                      row << amazon_sales_ranks.moving_average(7).last
        
      
        
          
          
                      row << (valid ? ((line_fit.forecast(1) - line_fit.forecast(amazon_sales_ranks_x_values.count)) / (line_fit.forecast(1).abs) * 100).round(2).to_s + "%" : nil)
        
      
        
          
          
                      row << (valid ? line_fit.rSquared.round(3) : nil)
        
      
        
          
          
                      row << Formulas.average_growth_rate(first_stat.amazon_sales_rank, last_stat.amazon_sales_rank, last_stat_date - first_stat_date, :negative)
        
      
        
          
          
          
        
      
        
          
          
          
        
      
        
          
          
                      row += [amazon_sales_ranks.standard_deviation, last_stat.itunes_rating_count, last_stat.bn_review_count] if add_extra_columns
        
      
        
          
          
                      row += [last_stat.amazon_likes, last_stat.amazon_review_count]
        
      
        
          
          
                      if add_extra_columns
        
      
        
          
          
                        row << (last_stat.amazon_review_count / (report_date - warehouse_book_version.pub_date.to_date).to_f if last_stat.amazon_review_count.present? && warehouse_book_version.pub_date.present?)
        
      
        
          
          
                        row << ((last_stat.amazon_review_count - first_stat.amazon_review_count) / stat_count.to_f if first_stat.amazon_review_count.present? && last_stat.amazon_review_count.present?)
        
      
        
          
          
                      end
        
      
        
          
          
          
        
      
        
          
          
                      row << last_stat.amazon_average_rating
        
      
        
          
          
                      if last_stat.amazon_review_count.present? && (last_stat.five_star_count.present? || last_stat.four_star_count.present?)
        
      
        
          
          
                        top_count = (last_stat.five_star_count || 0) + (last_stat.four_star_count || 0)
        
      
        
          
          
                        row << ((top_count.to_f / last_stat.amazon_review_count) * 100).to_s + "%"
        
      
        
          
          
                      else
        
      
        
          
          
                        row << '0%'
        
      
        
          
          
                      end
        
      
        
          
          
          
        
      
        
          
          
                      # Top Similar Item Categories
        
      
        
          
          
                      b = Hash.new(0)
        
      
        
          
          
                      warehouse_stats.compact.map {|warehouse_stat| WarehouseStat::WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_TREE_FIELDS.map {|key| warehouse_stat.send(key)}}.flatten.compact.map {|value| b[value] +=1}
        
      
        
          
          
                      values = b.sort_by{|key, value| value}.reverse.first(3).flatten.reject {|value| value.is_a?(Integer)}
        
      
        
          
          
                      row += EnterpriseReports.pad_serialized_data(values,3) {|value| value}
        
      
        
          
          
          
        
      
        
          
          
                      # Top Sub Categories
        
      
        
          
          
                      b = Hash.new(0)
        
      
        
          
          
                      warehouse_stats.map {|warehouse_stat| warehouse_stat.attributes.keys.select{|key| key.include?('amazon_category') && key.include?('id')}.map {|key| warehouse_stat.send(key)}}.flatten.compact.map {|value| b[value] +=1}
        
      
        
          
          
                      sorted_values = b.sort_by{|k, v| v}.reverse.first(3)
        
      
        
          
          
                      sorted_values = sorted_values.map {|x,y| [warehouse_categories.select{|c| c.id == x}.first.name, y]}
        
      
        
          
          
                      row += EnterpriseReports.pad_serialized_data(sorted_values,6) {|value| value.flatten.each_with_index.map {|value,i| i.odd? ? (value.to_f/stat_count*100).to_s + "%" : value}}
        
      
        
          
          
          
        
      
        
          
          
                      row += [last_stat.amazon_sales_rank, (last_stat.amazon_price / 100.0 if last_stat.amazon_price.present?), Urls.amazon_book_page(warehouse_book_version.asin, '.com')]
        
      
        
          
          
                      row << warehouse_book_version.amazon_book_description
        
      
        
          
          
          
        
      
        
          
          
                      report_csv << row
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          5
          
                  report_csv.flush
        
      
        
          
          
          
        
      
        
          
          
                  # Pass reports array to mailer and deliver
        
      
        
          5
          
                  EnterpriseReports.move_to_s3(client_name, report_csv)
        
      
        
          5
          
                  EnterpriseReportsMailer.basic_report(report_hash, client_config[:reports]["#{report_name}_identification"]).deliver
        
      
        
          
          
          
        
      
        
          5
          
                  report_csv.close
        
      
        
          
          
          
        
      
        
          10
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{report_name.to_s.camelcase} Identification Report Delivered -----"}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              class SubcategoryAverageSalesRankReport
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(report_date_string)
        
      
        
          1
          
                  report_date = report_date_string.to_date
        
      
        
          1
          
                  warehouse_date_id = WarehouseDate.find_by(date: report_date).id
        
      
        
          1
          
                  warehouse_region_id = WarehouseRegion.com.id
        
      
        
          1
          
                  warehouse_category_id_names = {}
        
      
        
          2
          
                  WarehouseCategory.where(tld: ".com", status:"canonical",category_type: "AmazonBookCategory").value_of(:id,:name).map{|k,v| warehouse_category_id_names[k]=v}
        
      
        
          1
          
                  client_name = :booklr
        
      
        
          1
          
                  report_hash = EnterpriseReports.generate_report_hash("subcategory-average-salesrank-report-#{report_date.strftime("%m%d%y")}", client_name)
        
      
        
          1
          
                  csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          1
          
                  csv << ['category_name', 'average_sales_rank']
        
      
        
          1
          
                  warehouse_category_id_names.first(30).each do |cat_id,cat_name|
        
      
        
          1
          
                    csv<<[cat_name,average_sales_rank_for_date_category_region(warehouse_date_id, cat_id, warehouse_region_id)]
        
      
        
          
          
                  end
        
      
        
          1
          
                  csv.flush
        
      
        
          
          
          
        
      
        
          1
          
                  EnterpriseReports.move_to_s3(client_name, csv)
        
      
        
          1
          
                  csv.close
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                def self.average_sales_rank_for_date_category_region(warehouse_date_id, warehouse_category_id, warehouse_region_id = 1)
        
      
        
          1
          
                  sales_ranks = WarehouseStat.where(warehouse_region_id:warehouse_region_id, warehouse_date_id:warehouse_date_id, warehouse_book_version_id:WarehouseListStat.where(warehouse_category_id:warehouse_category_id, warehouse_date_id:warehouse_date_id).value_of(:warehouse_book_version_id)-[nil]).value_of(:amazon_sales_rank).compact
        
      
        
          1
          
                  sales_ranks.sum/sales_ranks.count if sales_ranks.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module EnterpriseReports
        
      
        
          1
          
            module WeeklyReports
        
      
        
          1
          
              class RHPG
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(report_date_string, deliver_email = true)
        
      
        
          2
          
                  report_date = report_date_string.to_date
        
      
        
          2
          
                  warehouse_date_id = WarehouseDate.find_by(date: report_date).id
        
      
        
          2
          
                  warehouse_last_week_date_id = WarehouseDate.find_by(date: report_date - 7.days).id
        
      
        
          2
          
                  client_name = :rhpg
        
      
        
          2
          
                  client_config = AmazeBot.config[:reports][:clients][client_name]
        
      
        
          2
          
                  user = User.find_by email: 'rhpg@booklr.com'
        
      
        
          2
          
                  asins = user.warehouse_book_versions.ingested.value_of(:asin)
        
      
        
          
          
          
        
      
        
          4
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- Customer Behavior report on #{asins.count} book versions for: #{user.name} -----"}
        
      
        
          
          
          
        
      
        
          2
          
                  report_file_name = "rhpg-customer-behavior-report-#{report_date.strftime('%m%d%y')}"
        
      
        
          2
          
                  report_hash = EnterpriseReports.generate_report_hash(report_file_name, client_name)
        
      
        
          2
          
                  report_csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          
          
          
        
      
        
          
          
                  report_csv << ['ISBN', 'ASIN', 'Book Title', 'Book Type', 'Pub Date', 'Author', 'B&N Number of Ratings', '# Change 1 Week', '% Change 1 Week',
        
      
        
          
          
                                 'B&N Average Star Rating', '# Change 1 Week', '% Change 1 Week', 'AZ Number of Ratings', '# Change 1 Week',
        
      
        
          
          
                                 '% Change 1 Week', 'AZ Average Star Rating', '# Change 1 Week', '% Change 1 Week', 'AZ Number 1 Star',
        
      
        
          
          
                                 'AZ Number 2 Star', 'AZ Number 3 Star', 'AZ Number 4 Star', 'AZ Number 5 Star', 'AZ Number of Likes',
        
      
        
          
          
                                 'AZ: Who Bought this Item also Bought Title 1', 'AZ: Who Bought this Item also Bought Author 1',
        
      
        
          
          
                                 'AZ: Who Bought this Item also Bought Title 2', 'AZ: Who Bought this Item also Bought Author 2',
        
      
        
          
          
                                 'AZ: Who Bought this Item also Bought Title 3', 'AZ: Who Bought this Item also Bought Author 3',
        
      
        
          
          
                                 'AZ: Who Bought this Item also Bought Title 4', 'AZ: Who Bought this Item also Bought Author 4',
        
      
        
          
          
                                 'AZ: Who Bought this Item also Bought Title 5', 'AZ: Who Bought this Item also Bought Author 5',
        
      
        
          
          
                                 'AZ: Who Bought this Item also Bought Title 6', 'AZ: Who Bought this Item also Bought Author 6',
        
      
        
          
          
                                 'AZ: Buy After Viewing This Item? Title 1', 'AZ: Buy After Viewing This Item? Author 1',
        
      
        
          
          
                                 'AZ: Buy After Viewing This Item? Star Rating 1', 'AZ: Buy After Viewing This Item? Number of Ratings 1',
        
      
        
          
          
                                 'AZ: Buy After Viewing This Item? Price 1', 'AZ: Buy After Viewing This Item? Title 2',
        
      
        
          
          
                                 'AZ: Buy After Viewing This Item? Author 2', 'AZ: Buy After Viewing This Item? Star Rating 2',
        
      
        
          
          
                                 'AZ: Buy After Viewing This Item? Number of Ratings 2', 'AZ: Buy After Viewing This Item? Price 2',
        
      
        
          
          
                                 'AZ: Buy After Viewing This Item? Title 3', 'AZ: Buy After Viewing This Item? Author 3',
        
      
        
          
          
                                 'AZ: Buy After Viewing This Item? Star Rating 3', 'AZ: Buy After Viewing This Item? Number of Ratings 3',
        
      
        
          
          
                                 'AZ: Buy After Viewing This Item? Price 3', 'AZ: Buy After Viewing This Item? Title 4',
        
      
        
          
          
                                 'AZ: Buy After Viewing This Item? Author 4', 'AZ: Buy After Viewing This Item? Star Rating 4',
        
      
        
          
          
                                 'AZ: Buy After Viewing This Item? Number of Ratings 4', 'AZ: Buy After Viewing This Item? Price 4',
        
      
        
          
          
                                 'AZ Frequently Bought Together Name 1', 'AZ Frequently Bought Together Type 1', 'AZ Frequently Bought Together Price 1',
        
      
        
          
          
                                 'AZ Frequently Bought Together Name 2', 'AZ Frequently Bought Together Type 2', 'AZ Frequently Bought Together Price 2',
        
      
        
          
          
                                 'Similar Items By Category 1', 'Similar Items By Category 2', 'Similar Items By Category 3', 'Similar Items By Category 4', 'Similar Items By Category 5',
        
      
        
          
          
                                 'Similar Items By Category 6', 'Similar Items By Category 7', 'Similar Items By Category 8', 'Similar Items By Category 9', 'Similar Items By Category 10',
        
      
        
          2
          
                                 'Similar Items By Category 11', 'Similar Items By Category 12', 'Similar Items By Category 13', 'Similar Items By Category 14', 'Similar Items By Category 15']
        
      
        
          
          
          
        
      
        
          2
          
                  warehouse_book_version_ids = WarehouseBookVersion.com.where(asin: asins).value_of(:id)
        
      
        
          2
          
                  row_count = 0
        
      
        
          
          
          
        
      
        
          2
          
                  (warehouse_book_version_ids.count / 1000 + 1).times do |count|
        
      
        
          4
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "----- Generating report for #{(1000 * count)..(1000 * (count + 1) - 1)} out of #{warehouse_book_version_ids.count} book versions -----"}
        
      
        
          2
          
                    sql = WarehouseStat.single_query_join_and_select(%w[bn_review_count bn_average_rating amazon_review_count amazon_average_rating one_star_count two_star_count three_star_count four_star_count five_star_count amazon_likes] +
        
      
        
          72
          
                                                                     WarehouseStat::WAREHOUSE_AMAZON_ALSO_BOUGHT_FIELDS.select {|field| field.include?('title') || field.include?('author')} +
        
      
        
          48
          
                                                                     WarehouseStat::WAREHOUSE_AMAZON_BOUGHT_AFTER_VIEWING_FIELDS.reject {|field| field.include?('asin')} +
        
      
        
          
          
                                                                     WarehouseStat::WAREHOUSE_AMAZON_FREQUENTLY_BOUGHT_TOGETHER_FIELDS +
        
      
        
          
          
                                                                     WarehouseStat::WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_TREE_FIELDS,
        
      
        
          2
          
                                                                     {warehouse_book_version: %w[id isbn13 asin title book_format pub_date author_name]}, nil).where{(warehouse_stats.warehouse_book_version_id.in warehouse_book_version_ids[(1000 * count)..(1000 * (count + 1) - 1)])}.where(warehouse_region_id: WarehouseRegion.com.id, warehouse_date_id: [warehouse_date_id,warehouse_last_week_date_id]).order(:warehouse_book_version_id, :created_at).to_sql
        
      
        
          
          
          
        
      
        
          6
          
                    ActiveRecord::Base.connection.execute(sql).chunk {|el| el['warehouse_book_version_id']}.each do |warehouse_book_version_id, arr|
        
      
        
          2
          
                      stat = arr.count > 1 ? arr.second.with_indifferent_access : arr.first.with_indifferent_access
        
      
        
          2
          
                      last_week_stat = arr.count > 1 ? arr.first.with_indifferent_access : nil
        
      
        
          
          
          
        
      
        
          2
          
                      row = [stat[:warehouse_book_version_isbn13], stat[:warehouse_book_version_asin], stat[:warehouse_book_version_title],
        
      
        
          
          
                             stat[:warehouse_book_version_book_format], stat[:warehouse_book_version_pub_date], stat[:warehouse_book_version_author_name]]
        
      
        
          
          
          
        
      
        
          2
          
                      if last_week_stat.present?
        
      
        
          2
          
                        row += EnterpriseReports.output_field_and_change_and_percentage_change(stat, last_week_stat, :bn_review_count)
        
      
        
          2
          
                        row += EnterpriseReports.output_field_and_change_and_percentage_change(stat, last_week_stat, :bn_average_rating, true)
        
      
        
          2
          
                        row += EnterpriseReports.output_field_and_change_and_percentage_change(stat, last_week_stat, :amazon_review_count)
        
      
        
          2
          
                        row += EnterpriseReports.output_field_and_change_and_percentage_change(stat, last_week_stat, :amazon_average_rating, true)
        
      
        
          
          
                      end
        
      
        
          
          
          
        
      
        
          2
          
                      row += [stat[:one_star_count], stat[:two_star_count], stat[:three_star_count], stat[:four_star_count],
        
      
        
          
          
                              stat[:five_star_count], stat[:amazon_likes]]
        
      
        
          
          
          
        
      
        
          2
          
                      row += EnterpriseReports.output_customer_behavior_fields(stat, 'amazon_also_bought')
        
      
        
          2
          
                      row += EnterpriseReports.output_customer_behavior_fields(stat, 'bought_after_viewing')
        
      
        
          2
          
                      row += EnterpriseReports.output_customer_behavior_fields(stat, 'frequently_bought')
        
      
        
          2
          
                      row += EnterpriseReports.output_customer_behavior_fields(stat, 'similar_item')
        
      
        
          
          
          
        
      
        
          2
          
                      report_csv << row
        
      
        
          2
          
                      row_count += 1
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          2
          
                  report_csv.flush
        
      
        
          
          
          
        
      
        
          
          
                  # Upload finished report whether its complete or not
        
      
        
          2
          
                  EnterpriseReports.move_to_s3(client_name, report_csv)
        
      
        
          
          
          
        
      
        
          
          
                  # Determine if report is complete and then email about it
        
      
        
          2
          
                  if EnterpriseReports.report_count_valid? row_count, asins.count
        
      
        
          1
          
                    EnterpriseReportsMailer.basic_report(report_hash, client_config[:reports][:customer_behavior]).deliver if deliver_email
        
      
        
          2
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{user.name.parameterize} Weekly Report Delivered -----"}
        
      
        
          
          
                  else
        
      
        
          1
          
                    EnterpriseReports.send_report_count_error report_file_name, row_count, asins.count
        
      
        
          2
          
                    Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{user.name.parameterize}(#{client_name}) Weekly Report NOT Delivered: row count off by 0.5% of more -----"}
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          2
          
                  report_csv.close
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              class RhincCustomerBehavior
        
      
        
          1
          
                include EnterpriseReports
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(report_date_string, ftp = true, deliver_email = true, ignore_report_blocker = false)
        
      
        
          1
          
                  ActiveRecord::Base.connection.uncached do
        
      
        
          1
          
                    report_date = report_date_string.to_date
        
      
        
          1
          
                    selects = %w[bn_review_count bn_average_rating amazon_review_count amazon_average_rating
        
      
        
          
          
                                 one_star_count two_star_count three_star_count four_star_count five_star_count amazon_likes] +
        
      
        
          
          
                               WarehouseStat::WAREHOUSE_AMAZON_ALSO_BOUGHT_FIELDS +
        
      
        
          
          
                               WarehouseStat::WAREHOUSE_AMAZON_BOUGHT_AFTER_VIEWING_FIELDS +
        
      
        
          
          
                               WarehouseStat::WAREHOUSE_AMAZON_FREQUENTLY_BOUGHT_TOGETHER_FIELDS +
        
      
        
          
          
                               WarehouseStat::WAREHOUSE_AMAZON_SIMILAR_ITEM_CATEGORY_TREE_FIELDS
        
      
        
          1
          
                    v2_selects = WarehouseStat::WAREHOUSE_AMAZON_ALSO_BOUGHT_ITEMS_BY_FIELDS + WarehouseStat::WAREHOUSE_BN_ALSO_BOUGHT_FIELDS
        
      
        
          1
          
                    author_rank_selects = 'overall_author_rank AS "amazon_overall_author_rank", ' +
        
      
        
          
          
                                          'COALESCE(sub_category1_author_rank_id.name, sub_category1_author_rank_id_fallback) AS "amazon_sub_category1_author_rank_id_name", sub_category1_author_rank AS "amazon_sub_category1_author_rank", ' +
        
      
        
          
          
                                          'COALESCE(sub_category2_author_rank_id.name, sub_category2_author_rank_id_fallback) AS "amazon_sub_category2_author_rank_id_name", sub_category2_author_rank AS "amazon_sub_category2_author_rank", ' +
        
      
        
          
          
                                          'COALESCE(sub_category3_author_rank_id.name, sub_category3_author_rank_id_fallback) AS "amazon_sub_category3_author_rank_id_name", sub_category3_author_rank AS "amazon_sub_category3_author_rank", ' +
        
      
        
          
          
                                          'COALESCE(sub_category4_author_rank_id.name, sub_category4_author_rank_id_fallback) AS "amazon_sub_category4_author_rank_id_name", sub_category4_author_rank AS "amazon_sub_category4_author_rank"'
        
      
        
          1
          
                    sql = WarehouseStat.joins(:warehouse_book_version).outer_joins(:sub_category1_author_rank_id, :sub_category2_author_rank_id, :sub_category3_author_rank_id, :sub_category4_author_rank_id).transforming_select(true, selects).join_select('inner', false, warehouse_book_version: %w[title book_format asin isbn13 bn_id author_name]).transforming_select(true, v2_selects).select(author_rank_selects).where(warehouse_region_id: WarehouseRegion.com.id, warehouse_date_id: WarehouseDate.find_by(date: report_date).id).order(:warehouse_book_version_id).to_sql
        
      
        
          1
          
                    sql_copy_to_csv_and_deliver_report(sql, :rhinc, "rhinc-all-customer-behavior-v2-#{report_date.strftime('%m%d%y')}", (ignore_report_blocker ? nil : WarehouseBookVersion.com.ingested.count), ftp: ftp, emailable_report_name: (deliver_email ? :customer_behavior : nil), gzip: true)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              class RHDECompetitiveMongoReport
        
      
        
          1
          
                include Sidekiq::Worker
        
      
        
          1
          
                sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
                def perform(report_date_string = Date.current.to_s, deliver_email = true)
        
      
        
          1
          
                  report_date = report_date_string.to_date
        
      
        
          1
          
                  client_name = :rhde
        
      
        
          1
          
                  client_config = AmazeBot.config[:reports][:clients][client_name]
        
      
        
          
          
          
        
      
        
          1
          
                  user = User.find_by email: 'rhde@booklr.com'
        
      
        
          
          
          
        
      
        
          2
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- German Competitive Report for: #{user.name} -----"}
        
      
        
          
          
          
        
      
        
          1
          
                  report_file_name = "daily-competitive-titles-report-#{report_date.strftime('%m%d%y')}-#{user.name.parameterize}"
        
      
        
          1
          
                  report_hash = EnterpriseReports.generate_report_hash(report_file_name, client_name)
        
      
        
          1
          
                  csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          
          
          
        
      
        
          
          
                  csv << ['ISBN13', 'WorkID', 'ASIN', 'Title', 'Author', 'Publisher', 'OSD', 'Format', 'Size', 'List Price', 'Digital List Price', 'Consumer Price',
        
      
        
          
          
                          'Rank', 'Language', 'Competitive?', 'Featured?', 'Category 1', 'Category 1 rank', 'Category 2', 'Category 2 rank', 'Category 3',
        
      
        
          1
          
                          'Category 3 rank', 'Scraped On Date']
        
      
        
          
          
          
        
      
        
          1
          
                  row_count = 0
        
      
        
          
          
          
        
      
        
          1
          
                  collection = MongoUtilities.daily_collection(:de_competitive_format_data, report_date)
        
      
        
          1
          
                  block_size = 1000
        
      
        
          
          
          
        
      
        
          
          
                  # Iterate over the collection with no timeout and do it in slices for optimized memory usage
        
      
        
          
          
                  # ignore broken_titles document and iterate over those separately at the end
        
      
        
          1
          
                  collection.find({_id: {'$ne' => 'broken_titles'}}, timeout: false) do |cursor|
        
      
        
          1
          
                    total = collection.count
        
      
        
          1
          
                    i = 1
        
      
        
          1
          
                    cursor.each_slice(block_size) do |slice|
        
      
        
          2
          
                      Rails.logger.tagged('enterprise') {Rails.logger.info "#{i*block_size} out of #{total}"}
        
      
        
          1
          
                      i += 1
        
      
        
          
          
          
        
      
        
          
          
                      # Each document (per slice) represents all data for a single parent asin/ WorkID. Each document has 2 arrays
        
      
        
          
          
                      # of hashes, one for RHDE titles and one for the competitive titles
        
      
        
          1
          
                      slice.each do |title_family|
        
      
        
          
          
                        # combine both array of hashes and output to the csv
        
      
        
          1
          
                        titles = []
        
      
        
          1
          
                        titles += title_family['rhde_titles'] if title_family['rhde_titles'].present?
        
      
        
          1
          
                        titles += title_family['competitive_titles'].map {|x| x.merge!('competitive' => true)} if title_family['competitive_titles'].present?
        
      
        
          1
          
                        parent_asin = title_family['_id'].split('-').first
        
      
        
          
          
          
        
      
        
          1
          
                        titles.each do |title|
        
      
        
          1
          
                          pub_date =  ScraperUtilities.parse_date_string(title['pub_date']) || 'invalid'
        
      
        
          
          
          
        
      
        
          
          
                          # 2nd column is WorkID unless it doesn't exist then we use the parent_asin to tie titles together
        
      
        
          1
          
                          row = [title['isbn13'], (title_family['WorkID'] || parent_asin), title['asin'], title['title'],
        
      
        
          
          
                                 title['author'], title['publisher'], pub_date.to_s, title['book_format'], title['physical_details'],
        
      
        
          
          
                                 title['amazon_list_price'].try(:to_i).try(:/, 100.0), title['digital_list_price'].try(:to_i).try(:/, 100.0),
        
      
        
          
          
                                 title['amazon_price'].try(:to_i).try(:/, 100.0), title['amazon_sales_rank'], title['language']]
        
      
        
          1
          
                          row << (title['competitive'] ? 'Y' : 'N')
        
      
        
          
          
                          # title_family["featured"] is an array of all of the featured asins for this WorkID/parent asin
        
      
        
          1
          
                          row << (title_family['featured'].include?(title['asin']) ? 'Y' : 'N')
        
      
        
          1
          
                          row += [title['sub_category1_tree'], title['sub_category1_rank'], title['sub_category2_tree'], title['sub_category2_rank'], title['sub_category3_tree'], title['sub_category3_rank'], Date.current.to_s]
        
      
        
          
          
          
        
      
        
          24
          
                          csv << row.collect {|value| value.is_a?(String) ? ActiveSupport::Inflector.transliterate(value) : value}
        
      
        
          1
          
                          row_count += 1
        
      
        
          
          
                        end
        
      
        
          
          
                      end
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          1
          
                  if collection.find({_id: 'broken_titles'}).count > 0
        
      
        
          1
          
                    csv << ['----------- BROKEN TITLES -----------']
        
      
        
          
          
          
        
      
        
          1
          
                    row_count += 1
        
      
        
          1
          
                    i = 1
        
      
        
          1
          
                    total = collection.find({_id: 'broken_titles'}).first['titles'].count
        
      
        
          
          
          
        
      
        
          
          
                    # Iterate over broken titles and output as much data is available from the page
        
      
        
          1
          
                    collection.find({_id: 'broken_titles'}).first['titles'].each do |title|
        
      
        
          1
          
                      Rails.logger.tagged('enterprise') {Rails.logger.info "#{i} out of #{total}"} if i % 1000 == 0
        
      
        
          1
          
                      i += 1
        
      
        
          1
          
                      pub_date =  title['pub_date'].present? ? (ScraperUtilities.parse_date_string(title['pub_date']).to_s || 'invalid') : ''
        
      
        
          
          
          
        
      
        
          
          
                      # Always has N/A on the featured title column, broken titles can never be featured
        
      
        
          1
          
                      row = [title['isbn13'], title['work_id'], title['asin'], title['title'], title['author'], title['publisher'],
        
      
        
          
          
                             pub_date, title['book_format'], title['physical_details'], title['amazon_list_price'].try(:to_i).try(:/, 100.0),
        
      
        
          
          
                             title['digital_list_price'].try(:to_i).try(:/, 100.0), title['amazon_price'].try(:to_i).try(:/, 100.0),
        
      
        
          
          
                             title['amazon_sales_rank'], title['language'], 'N', 'N/A', title['sub_category1_tree'], title['sub_category1_rank'],
        
      
        
          
          
                             title['sub_category2_tree'], title['sub_category2_rank'], title['sub_category3_tree'],
        
      
        
          23
          
                             title['sub_category3_rank'], Date.current.to_s].collect {|value| value.is_a?(String) ? ActiveSupport::Inflector.transliterate(value) : value}
        
      
        
          
          
          
        
      
        
          1
          
                      csv << row
        
      
        
          1
          
                      row_count += 1
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          1
          
                  csv.flush
        
      
        
          
          
          
        
      
        
          
          
                  # Upload finished report whether its complete or not
        
      
        
          1
          
                  EnterpriseReports.move_to_s3(client_name, csv)
        
      
        
          
          
          
        
      
        
          
          
                  # Set redis details and mail report
        
      
        
          1
          
                  $redis.hmset('daily_report_stats', 'rhde-row-count', row_count, 'rhde-send-time', Time.current.to_s)
        
      
        
          1
          
                  EnterpriseReportsMailer.basic_report(report_hash, client_config[:reports][:daily_report]).deliver if deliver_email
        
      
        
          2
          
                  Rails.logger.tagged('enterprise') {Rails.logger.info "----- #{user.name.parameterize} German Competitive Report Delivered -----"}
        
      
        
          
          
          
        
      
        
          1
          
                  csv.close
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module EtlWorkers
        
      
        
          1
          
            class QueueNightlyEtl
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform(date_string = Date.current.to_s)
        
      
        
          1
          
                date = date_string.to_date
        
      
        
          1
          
                warehouse_date_id = WarehouseDate.where(date: date).first_or_create.id
        
      
        
          1
          
                EtlWorkers::NightlyStatsEtl.process_date(date, warehouse_date_id, true, true, true)
        
      
        
          1
          
                EtlWorkers::NightlyListStatEtl.process_date(date, warehouse_date_id)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class NightlyStatsEtl
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :etl
        
      
        
          
          
          
        
      
        
          1
          
              BATCH_SIZE = 100000
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, starting_id, ending_id, warehouse_region_com_id, warehouse_region_co_uk_id, warehouse_date_id, date_string, etl_product_stats, etl_book_version_categories, etl_category_stats)
        
      
        
          248
          
                t = Benchmark.realtime do
        
      
        
          248
          
                  WarehouseRegion.connection.uncached do
        
      
        
          248
          
                    collection = $mongodb.collection collection_name
        
      
        
          248
          
                    existing_record = collection.find({warehouse_book_version_id: {'$gte' => starting_id, '$lte' => ending_id}}).limit(1).first
        
      
        
          248
          
                    return if etl_product_stats && existing_record.present? && WarehouseStat.where(warehouse_date_id: warehouse_date_id, warehouse_book_version_id: existing_record['warehouse_book_version_id']).exists?
        
      
        
          
          
          
        
      
        
          248
          
                    timestamp = Time.current.utc
        
      
        
          248
          
                    errmsg = stat_connection = categories_connection = category_stats = nil
        
      
        
          248
          
                    warehouse_categories_by_tld = Utilities::TLDS.each_with_object({}) do |tld, hash|
        
      
        
          512
          
                      hash[tld] = WarehouseCategory.canonical.where(tld: tld).value_of(:name, :id).each_with_object({}) {|name_and_id, hash| hash[name_and_id[0]] = name_and_id[1]}
        
      
        
          
          
                    end
        
      
        
          248
          
                    if etl_product_stats
        
      
        
          248
          
                      stat_connection = WarehouseStat.connection.raw_connection
        
      
        
          248
          
                      stat_connection.exec("COPY #{WarehouseStat.table_name} (#{warehouse_stats_columns.join(',')}) FROM STDIN CSV NULL '' QUOTE '\"' ESCAPE '\\'")
        
      
        
          
          
          
        
      
        
          58776
          
                      field_counts = Hash[*warehouse_stats_columns.collect{|k| [k, 0]}.flatten]
        
      
        
          248
          
                      field_counts['bn_related_format_data_total'] = 0
        
      
        
          248
          
                      field_counts['amazon_related_format_data_total'] = 0
        
      
        
          
          
                    end
        
      
        
          248
          
                    if etl_book_version_categories
        
      
        
          
          
                      categories_connection = PostgresUtilities.get_new_connection(BookVersionCategory)
        
      
        
          
          
                      categories_connection.exec("COPY #{BookVersionCategory.table_name} (#{book_version_categories_columns.join(',')}) FROM STDIN CSV NULL '' QUOTE '\"' ESCAPE '\\'")
        
      
        
          
          
                    end
        
      
        
          248
          
                    category_stats = {} if etl_category_stats
        
      
        
          
          
          
        
      
        
          248
          
                    begin
        
      
        
          248
          
                      collection.find({warehouse_book_version_id: {'$gte' => starting_id, '$lte' => ending_id}}, timeout: false) do |cursor|
        
      
        
          248
          
                        cursor.each do |record|
        
      
        
          249
          
                          warehouse_region_id = record['tld'] == '.com' ? warehouse_region_com_id : warehouse_region_co_uk_id
        
      
        
          249
          
                          warehouse_category_ids_by_name = warehouse_categories_by_tld[record['tld']]
        
      
        
          
          
          
        
      
        
          249
          
                          if etl_product_stats
        
      
        
          249
          
                            stats = process_record_for_warehouse_stats stat_connection, record, warehouse_category_ids_by_name, warehouse_region_id, warehouse_date_id, timestamp
        
      
        
          
          
          
        
      
        
          
          
                            # Calculate booklr stat counts
        
      
        
          249
          
                            bn_related_format_present = false
        
      
        
          249
          
                            amazon_related_format_present = false
        
      
        
          249
          
                            warehouse_stats_columns.zip(stats).each do |field, stat|
        
      
        
          58764
          
                              if stat.present?
        
      
        
          56514
          
                                field_counts[field] += 1
        
      
        
          56514
          
                                bn_related_format_present = true if %w[related_formats_nook_ean related_formats_nook_price].include? field
        
      
        
          56514
          
                                amazon_related_format_present = true if %w[related_formats_kindle_price related_formats_kindle_asin related_formats_hardcover_price related_formats_hardcover_asin related_formats_mass_market_paperback_price related_formats_mass_market_paperback_asin related_formats_paperback_price related_formats_paperback_asin].include? field
        
      
        
          
          
                              end
        
      
        
          
          
                            end
        
      
        
          249
          
                            field_counts['bn_related_format_data_total'] += 1 if bn_related_format_present
        
      
        
          249
          
                            field_counts['amazon_related_format_data_total'] += 1 if amazon_related_format_present
        
      
        
          
          
                          end
        
      
        
          
          
          
        
      
        
          249
          
                          process_record_for_book_version_categories categories_connection, record, warehouse_category_ids_by_name, warehouse_region_id if etl_book_version_categories
        
      
        
          249
          
                          process_record_for_category_stats record, category_stats, warehouse_category_ids_by_name, warehouse_region_id if etl_category_stats
        
      
        
          
          
                        end
        
      
        
          
          
          
        
      
        
          248
          
                        if etl_product_stats
        
      
        
          248
          
                          stat_collection = MongoUtilities.daily_scrape_field_counts_collection
        
      
        
          248
          
                          stat_collection.update({date: date_string}, {'$inc' => field_counts})
        
      
        
          
          
                        end
        
      
        
          
          
                      end
        
      
        
          
          
          
        
      
        
          248
          
                      if etl_category_stats && category_stats.present?
        
      
        
          
          
                        CategoryStatsCollection.new(date_string.to_date).add_category_stats category_stats
        
      
        
          
          
                      end
        
      
        
          
          
                    rescue Errno => err
        
      
        
          
          
                      errmsg = '%s while reading copy data: %s' % [err.class.name, err.message]
        
      
        
          
          
                    end
        
      
        
          
          
          
        
      
        
          248
          
                    PostgresUtilities.finalize_copy_command stat_connection, errmsg if etl_product_stats
        
      
        
          248
          
                    PostgresUtilities.finalize_copy_command categories_connection, errmsg if etl_book_version_categories
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          496
          
                Rails.logger.tagged('stats') {Rails.logger.info "NightlyStatsEtl Time taken: #{t}"}
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def process_record_for_warehouse_stats(connection, record, warehouse_category_ids_by_name, warehouse_region_id, warehouse_date_id, timestamp)
        
      
        
          249
          
                stats = [timestamp, timestamp, Utilities.prepare_string_for_copy(record['_id']),
        
      
        
          
          
                         warehouse_category_ids_by_name[record['sub_category1_tree']], Utilities.prepare_integer_for_copy(record['sub_category1_rank']),
        
      
        
          
          
                         warehouse_category_ids_by_name[record['sub_category2_tree']], Utilities.prepare_integer_for_copy(record['sub_category2_rank']),
        
      
        
          
          
                         warehouse_category_ids_by_name[record['sub_category3_tree']], Utilities.prepare_integer_for_copy(record['sub_category3_rank']),
        
      
        
          
          
                         record['book_version_stat_id'], warehouse_date_id, warehouse_region_id, record['warehouse_book_version_id'],
        
      
        
          
          
                         warehouse_category_ids_by_name[record['amazon_sales_rank_category']], Utilities.prepare_integer_for_copy(record['amazon_sales_rank']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['barnes_sales_rank']), Utilities.prepare_integer_for_copy(record['likes']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['amazon_list_price']), Utilities.prepare_integer_for_copy(record['amazon_price']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['digital_list_price']), Utilities.prepare_integer_for_copy(record['bn_nook_price']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['bn_nook_list_price']), Utilities.prepare_integer_for_copy(record['amazon_euro_price']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['amazon_aus_price']), Utilities.prepare_integer_for_copy(record['bn_price']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['bn_list_price']), Utilities.prepare_integer_for_copy(record['itunes_price']),
        
      
        
          
          
                         Utilities.prepare_float_for_copy(record['itunes_average_rating']), Utilities.prepare_integer_for_copy(record['itunes_rating_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['stumbleupon_count']), Utilities.prepare_integer_for_copy(record['reddit_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['fb_commentsbox_count']), Utilities.prepare_integer_for_copy(record['fb_click_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['fb_comment_count']), Utilities.prepare_integer_for_copy(record['fb_like_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['fb_share_count']), Utilities.prepare_integer_for_copy(record['delicious_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['google_plus_count']), Utilities.prepare_integer_for_copy(record['twitter_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['digg_count']), Utilities.prepare_integer_for_copy(record['pinterest_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['linkedin_count']), Utilities.prepare_float_for_copy(record['amazon_average_rating']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['amazon_review_count']), Utilities.prepare_float_for_copy(record['barnes_average_rating']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['barnes_review_count']), Utilities.prepare_string_for_copy(record['amazon_availability']),
        
      
        
          
          
                         record['kindle_unlimited'], Utilities.prepare_float_for_copy(record['goodreads_work_average_rating']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['goodreads_work_rating_count']), Utilities.prepare_integer_for_copy(record['goodreads_work_review_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['goodreads_work_added_by_count']), Utilities.prepare_integer_for_copy(record['goodreads_work_to_read_count']),
        
      
        
          
          
                         Utilities.prepare_float_for_copy(record['goodreads_edition_average_rating']), Utilities.prepare_integer_for_copy(record['goodreads_edition_rating_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['goodreads_edition_review_count']), Utilities.prepare_integer_for_copy(record['goodreads_edition_added_by_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['goodreads_5_star_count']), Utilities.prepare_integer_for_copy(record['goodreads_4_star_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['goodreads_3_star_count']), Utilities.prepare_integer_for_copy(record['goodreads_2_star_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['goodreads_1_star_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['five_star_count']), Utilities.prepare_integer_for_copy(record['four_star_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['three_star_count']), Utilities.prepare_integer_for_copy(record['two_star_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['one_star_count']), Utilities.prepare_float_for_copy(record['itunes_gb_average_rating']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['itunes_gb_rating_count']), Utilities.prepare_integer_for_copy(record['itunes_gb_price']),
        
      
        
          
          
                         Utilities.prepare_float_for_copy(record['itunes_au_average_rating']), Utilities.prepare_integer_for_copy(record['itunes_au_rating_count']),
        
      
        
          
          
                         Utilities.prepare_integer_for_copy(record['itunes_au_price'])]
        
      
        
          249
          
                stats += [(warehouse_category_ids_by_name[record['amazon_sales_rank_category']].present? ? nil : Utilities.prepare_string_for_copy(record['amazon_sales_rank_category'])),
        
      
        
          249
          
                          (warehouse_category_ids_by_name[record['sub_category1_tree']].present? ? nil : Utilities.prepare_string_for_copy(record['sub_category1_tree'])),
        
      
        
          249
          
                          (warehouse_category_ids_by_name[record['sub_category2_tree']].present? ? nil : Utilities.prepare_string_for_copy(record['sub_category2_tree'])),
        
      
        
          249
          
                          (warehouse_category_ids_by_name[record['sub_category3_tree']].present? ? nil : Utilities.prepare_string_for_copy(record['sub_category3_tree']))]
        
      
        
          249
          
                stats += [Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_1']), Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_2']),
        
      
        
          
          
                          Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_3']), Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_4']),
        
      
        
          
          
                          Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_5']), Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_6']),
        
      
        
          
          
                          Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_7']), Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_8']),
        
      
        
          
          
                          Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_9']), Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_10']),
        
      
        
          
          
                          Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_11']), Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_12']),
        
      
        
          
          
                          Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_13']), Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_14']),
        
      
        
          
          
                          Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_15']), Utilities.prepare_string_for_copy(record['amazon_also_bought_items_by_16'])]
        
      
        
          249
          
                stats += Utilities.pad_customer_behavior_data(record['also_bought'].try(:first, 6), 36) do |column_values, data, index|
        
      
        
          1494
          
                  column_values[index * 6] = Utilities.prepare_string_for_copy(data['isbn_or_asin'])
        
      
        
          1494
          
                  column_values[index * 6 + 1] = Utilities.prepare_string_for_copy(data['author'])
        
      
        
          1494
          
                  column_values[index * 6 + 2] = Utilities.prepare_integer_for_copy(Utilities.ignore_bad_price_for_copy data['price'])
        
      
        
          1494
          
                  column_values[index * 6 + 3] = Utilities.prepare_integer_for_copy(data['rating_count'])
        
      
        
          1494
          
                  column_values[index * 6 + 4] = Utilities.prepare_float_for_copy(data['star_count'].try(:[], 0..-16))
        
      
        
          1494
          
                  column_values[index * 6 + 5] = Utilities.prepare_string_for_copy(data['title'])
        
      
        
          
          
                end
        
      
        
          249
          
                stats += Utilities.pad_customer_behavior_data(record['bought_after_viewing'].try(:first, 4), 24) do |column_values, data, index|
        
      
        
          996
          
                  column_values[index * 6] = Utilities.prepare_string_for_copy(data['isbn_or_asin'])
        
      
        
          996
          
                  column_values[index * 6 + 1] = Utilities.prepare_string_for_copy(data['author'])
        
      
        
          996
          
                  column_values[index * 6 + 2] = Utilities.prepare_integer_for_copy(Utilities.ignore_bad_price_for_copy data['price'])
        
      
        
          996
          
                  column_values[index * 6 + 3] = Utilities.prepare_integer_for_copy(data['rating_count'])
        
      
        
          996
          
                  column_values[index * 6 + 4] = Utilities.prepare_float_for_copy(data['star_count'].try(:[], 0..-16))
        
      
        
          996
          
                  column_values[index * 6 + 5] = Utilities.prepare_string_for_copy(data['title'])
        
      
        
          
          
                end
        
      
        
          996
          
                stats += Utilities.pad_customer_behavior_data(record['frequently_bought_together'].try(:reject) {|x| x['title'].include? 'This item: '}.try(:first, 2), 6) do |column_values, data, index|
        
      
        
          498
          
                  column_values[index * 3] = Utilities.prepare_string_for_copy(data['type'])
        
      
        
          498
          
                  column_values[index * 3 + 1] = Utilities.prepare_integer_for_copy(data['price'])
        
      
        
          498
          
                  column_values[index * 3 + 2] = Utilities.prepare_string_for_copy(data['title'])
        
      
        
          
          
                end
        
      
        
          249
          
                stats += Utilities.pad_customer_behavior_data(record['similar_items_by_category'].try(:first, 15), 15) do |column_values, data, index|
        
      
        
          3735
          
                  column_values[index] = Utilities.prepare_string_for_copy(data)
        
      
        
          
          
                end
        
      
        
          249
          
                stats += Utilities.pad_customer_behavior_data(record['similar_items_by_category_external_id'].try(:first, 15), 15) do |column_values, data, index|
        
      
        
          3735
          
                  column_values[index] = Utilities.prepare_string_for_copy(data)
        
      
        
          
          
                end
        
      
        
          249
          
                stats += Utilities.pad_customer_behavior_data(record['bn_also_bought'].try(:first, 6), 24) do |column_values, data, index|
        
      
        
          1494
          
                  column_values[index * 4] = Utilities.prepare_string_for_copy(data['ean'])
        
      
        
          1494
          
                  column_values[index * 4 + 1] = Utilities.prepare_string_for_copy(data['author'])
        
      
        
          1494
          
                  column_values[index * 4 + 2] = Utilities.prepare_integer_for_copy(data['price'])
        
      
        
          1494
          
                  column_values[index * 4 + 3] = Utilities.prepare_string_for_copy(data['title'])
        
      
        
          
          
                end
        
      
        
          249
          
                stats += [Utilities.prepare_integer_for_copy(record['Kindle Edition'].try(:[], 'price')), Utilities.prepare_string_for_copy(record['Kindle Edition'].try(:[], 'asin')),
        
      
        
          
          
                          Utilities.prepare_integer_for_copy(record['MassMarketPaperback'].try(:[], 'price')), Utilities.prepare_string_for_copy(record['MassMarketPaperback'].try(:[], 'asin')),
        
      
        
          
          
                          Utilities.prepare_integer_for_copy(record['NOOK Book'].try(:[], 'price')), Utilities.prepare_string_for_copy(record['NOOK Book'].try(:[], 'ean')),
        
      
        
          
          
                          Utilities.prepare_integer_for_copy(record['Hardcover'].try(:[], 'price')), Utilities.prepare_string_for_copy(record['Hardcover'].try(:[], 'asin')),
        
      
        
          
          
                          Utilities.prepare_integer_for_copy(record['Paperback'].try(:[], 'price')), Utilities.prepare_string_for_copy(record['Paperback'].try(:[], 'asin'))]
        
      
        
          
          
          
        
      
        
          249
          
                if record['author_ranks'].present?
        
      
        
          248
          
                  sub_category_1 = record['author_ranks']['sub_category_1']
        
      
        
          248
          
                  sub_category_2 = record['author_ranks']['sub_category_2']
        
      
        
          248
          
                  sub_category_3 = record['author_ranks']['sub_category_3']
        
      
        
          248
          
                  sub_category_4 = record['author_ranks']['sub_category_4']
        
      
        
          248
          
                  stats += [Utilities.prepare_integer_for_copy(record['author_ranks']['overall_rank']),
        
      
        
          248
          
                            (Utilities.prepare_integer_for_copy(sub_category_1['rank']) if sub_category_1.present?),
        
      
        
          248
          
                            (Utilities.prepare_integer_for_copy(sub_category_2['rank']) if sub_category_2.present?),
        
      
        
          248
          
                            (Utilities.prepare_integer_for_copy(sub_category_3['rank']) if sub_category_3.present?),
        
      
        
          248
          
                            (Utilities.prepare_integer_for_copy(sub_category_4['rank']) if sub_category_4.present?),
        
      
        
          248
          
                            (warehouse_category_ids_by_name[ScraperUtilities.convert_author_rank_category_name(sub_category_1['category_name'])] if sub_category_1.present?),
        
      
        
          248
          
                            (warehouse_category_ids_by_name[ScraperUtilities.convert_author_rank_category_name(sub_category_2['category_name'])] if sub_category_2.present?),
        
      
        
          248
          
                            (warehouse_category_ids_by_name[ScraperUtilities.convert_author_rank_category_name(sub_category_3['category_name'])] if sub_category_3.present?),
        
      
        
          248
          
                            (warehouse_category_ids_by_name[ScraperUtilities.convert_author_rank_category_name(sub_category_4['category_name'])] if sub_category_4.present?),
        
      
        
          248
          
                            (sub_category_1.blank? || warehouse_category_ids_by_name[ScraperUtilities.convert_author_rank_category_name(sub_category_1['category_name'])].present? ? nil : Utilities.prepare_string_for_copy(ScraperUtilities.convert_author_rank_category_name(sub_category_1['category_name']))),
        
      
        
          248
          
                            (sub_category_2.blank? || warehouse_category_ids_by_name[ScraperUtilities.convert_author_rank_category_name(sub_category_2['category_name'])].present? ? nil : Utilities.prepare_string_for_copy(ScraperUtilities.convert_author_rank_category_name(sub_category_2['category_name']))),
        
      
        
          248
          
                            (sub_category_3.blank? || warehouse_category_ids_by_name[ScraperUtilities.convert_author_rank_category_name(sub_category_3['category_name'])].present? ? nil : Utilities.prepare_string_for_copy(ScraperUtilities.convert_author_rank_category_name(sub_category_3['category_name']))),
        
      
        
          248
          
                            (sub_category_4.blank? || warehouse_category_ids_by_name[ScraperUtilities.convert_author_rank_category_name(sub_category_4['category_name'])].present? ? nil : Utilities.prepare_string_for_copy(ScraperUtilities.convert_author_rank_category_name(sub_category_4['category_name'])))]
        
      
        
          
          
                else
        
      
        
          1
          
                  stats += Array.new(13)
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          249
          
                connection.put_copy_data("#{stats.join(',')}\n")
        
      
        
          
          
          
        
      
        
          249
          
                stats
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def process_record_for_book_version_categories(connection, record, warehouse_category_ids_by_name, warehouse_region_id)
        
      
        
          
          
                if record['similar_items_by_category'].present?
        
      
        
          
          
                  # put book_version_categories data
        
      
        
          
          
                  record['similar_items_by_category'].collect do |category_name|
        
      
        
          
          
                    row = [record['warehouse_book_version_id'], Utilities.prepare_string_for_copy(category_name),
        
      
        
          
          
                           warehouse_category_ids_by_name[category_name], warehouse_region_id]
        
      
        
          
          
                    connection.put_copy_data("#{row.join(',')}\n")
        
      
        
          
          
          
        
      
        
          
          
                    row
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def process_record_for_category_stats(record, category_stats, warehouse_category_ids_by_name, warehouse_region_id)
        
      
        
          
          
                rank = Utilities.prepare_integer_for_copy(record['amazon_sales_rank'])
        
      
        
          
          
                if rank.present? && record['similar_items_by_category'].present?
        
      
        
          
          
                  record['similar_items_by_category'].collect do |category_name|
        
      
        
          
          
                    category_key = category_name.gsub('.', '')
        
      
        
          
          
                    category_stats[category_key] ||= {best_rank: rank, worst_rank: rank, ranks: [], category_name: category_name,
        
      
        
          
          
                                                       warehouse_region_id: warehouse_region_id,
        
      
        
          
          
                                                       warehouse_category_id: warehouse_category_ids_by_name[category_name],
        
      
        
          
          
                                                       best_rank_book_version_id: record['warehouse_book_version_id'],
        
      
        
          
          
                                                       worst_rank_book_version_id: record['warehouse_book_version_id']}
        
      
        
          
          
                    if rank < category_stats[category_key][:best_rank]
        
      
        
          
          
                      category_stats[category_key][:best_rank] = rank
        
      
        
          
          
                      category_stats[category_key][:best_rank_book_version_id] = record['warehouse_book_version_id']
        
      
        
          
          
                    end
        
      
        
          
          
                    if rank > category_stats[category_key][:worst_rank]
        
      
        
          
          
                      category_stats[category_key][:worst_rank] = rank
        
      
        
          
          
                      category_stats[category_key][:worst_rank_book_version_id] = record['warehouse_book_version_id']
        
      
        
          
          
                    end
        
      
        
          
          
                    category_stats[category_key][:ranks] << rank
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def warehouse_stats_columns
        
      
        
          745
          
                %w[created_at updated_at mongo_id
        
      
        
          
          
          
        
      
        
          
          
                  warehouse_amazon_category1_id amazon_category1_rank
        
      
        
          
          
                  warehouse_amazon_category2_id amazon_category2_rank
        
      
        
          
          
                  warehouse_amazon_category3_id amazon_category3_rank
        
      
        
          
          
                  tx_book_version_stat_id warehouse_date_id warehouse_region_id warehouse_book_version_id
        
      
        
          
          
                  warehouse_amazon_sales_rank_category_id amazon_sales_rank bn_sales_rank amazon_likes amazon_list_price
        
      
        
          
          
                  amazon_price amazon_digital_list_price bn_nook_price bn_nook_list_price amazon_euro_price
        
      
        
          
          
                  amazon_aus_price bn_price bn_list_price itunes_price itunes_average_rating itunes_rating_count
        
      
        
          
          
                  stumbleupon_count reddit_count fb_commentsbox_count fb_click_count fb_comment_count fb_like_count
        
      
        
          
          
                  fb_share_count delicious_count google_plus_count twitter_count digg_count pinterest_count
        
      
        
          
          
                  linkedin_count amazon_average_rating amazon_review_count bn_average_rating bn_review_count
        
      
        
          
          
                  amazon_availability kindle_unlimited
        
      
        
          
          
                  goodreads_work_average_rating goodreads_work_rating_count goodreads_work_review_count
        
      
        
          
          
                  goodreads_work_added_by_count goodreads_work_to_read_count goodreads_edition_average_rating
        
      
        
          
          
                  goodreads_edition_rating_count goodreads_edition_review_count goodreads_edition_added_by_count
        
      
        
          
          
                  goodreads_5_star_count goodreads_4_star_count goodreads_3_star_count goodreads_2_star_count goodreads_1_star_count
        
      
        
          
          
                  five_star_count four_star_count three_star_count two_star_count one_star_count
        
      
        
          
          
                  itunes_gb_average_rating itunes_gb_rating_count itunes_gb_price
        
      
        
          
          
                  itunes_au_average_rating itunes_au_rating_count itunes_au_price
        
      
        
          
          
                  warehouse_amazon_sales_rank_category_id_fallback warehouse_amazon_category1_id_fallback
        
      
        
          
          
                  warehouse_amazon_category2_id_fallback warehouse_amazon_category3_id_fallback
        
      
        
          
          
                  amazon_also_bought_items_by_1 amazon_also_bought_items_by_2 amazon_also_bought_items_by_3
        
      
        
          
          
                  amazon_also_bought_items_by_4 amazon_also_bought_items_by_5 amazon_also_bought_items_by_6
        
      
        
          
          
                  amazon_also_bought_items_by_7 amazon_also_bought_items_by_8 amazon_also_bought_items_by_9
        
      
        
          
          
                  amazon_also_bought_items_by_10 amazon_also_bought_items_by_11 amazon_also_bought_items_by_12
        
      
        
          
          
                  amazon_also_bought_items_by_13 amazon_also_bought_items_by_14 amazon_also_bought_items_by_15
        
      
        
          
          
                  amazon_also_bought_items_by_16
        
      
        
          
          
                  amazon_also_bought_asin_1 amazon_also_bought_author_1 amazon_also_bought_price_1
        
      
        
          
          
                  amazon_also_bought_rating_1 amazon_also_bought_star_count_1 amazon_also_bought_title_1
        
      
        
          
          
                  amazon_also_bought_asin_2 amazon_also_bought_author_2 amazon_also_bought_price_2
        
      
        
          
          
                  amazon_also_bought_rating_2 amazon_also_bought_star_count_2 amazon_also_bought_title_2
        
      
        
          
          
                  amazon_also_bought_asin_3 amazon_also_bought_author_3 amazon_also_bought_price_3
        
      
        
          
          
                  amazon_also_bought_rating_3 amazon_also_bought_star_count_3 amazon_also_bought_title_3
        
      
        
          
          
                  amazon_also_bought_asin_4 amazon_also_bought_author_4 amazon_also_bought_price_4
        
      
        
          
          
                  amazon_also_bought_rating_4 amazon_also_bought_star_count_4 amazon_also_bought_title_4
        
      
        
          
          
                  amazon_also_bought_asin_5 amazon_also_bought_author_5 amazon_also_bought_price_5
        
      
        
          
          
                  amazon_also_bought_rating_5 amazon_also_bought_star_count_5 amazon_also_bought_title_5
        
      
        
          
          
                  amazon_also_bought_asin_6 amazon_also_bought_author_6 amazon_also_bought_price_6
        
      
        
          
          
                  amazon_also_bought_rating_6 amazon_also_bought_star_count_6 amazon_also_bought_title_6
        
      
        
          
          
                  amazon_bought_after_viewing_asin_1 amazon_bought_after_viewing_author_1 amazon_bought_after_viewing_price_1
        
      
        
          
          
                  amazon_bought_after_viewing_rating_1 amazon_bought_after_viewing_star_count_1 amazon_bought_after_viewing_title_1
        
      
        
          
          
                  amazon_bought_after_viewing_asin_2 amazon_bought_after_viewing_author_2 amazon_bought_after_viewing_price_2
        
      
        
          
          
                  amazon_bought_after_viewing_rating_2 amazon_bought_after_viewing_star_count_2 amazon_bought_after_viewing_title_2
        
      
        
          
          
                  amazon_bought_after_viewing_asin_3 amazon_bought_after_viewing_author_3 amazon_bought_after_viewing_price_3
        
      
        
          
          
                  amazon_bought_after_viewing_rating_3 amazon_bought_after_viewing_star_count_3 amazon_bought_after_viewing_title_3
        
      
        
          
          
                  amazon_bought_after_viewing_asin_4 amazon_bought_after_viewing_author_4 amazon_bought_after_viewing_price_4
        
      
        
          
          
                  amazon_bought_after_viewing_rating_4 amazon_bought_after_viewing_star_count_4 amazon_bought_after_viewing_title_4
        
      
        
          
          
                  amazon_frequently_bought_together_format_1 amazon_frequently_bought_together_price_1
        
      
        
          
          
                  amazon_frequently_bought_together_title_1 amazon_frequently_bought_together_format_2
        
      
        
          
          
                  amazon_frequently_bought_together_price_2 amazon_frequently_bought_together_title_2
        
      
        
          
          
                  amazon_similar_item_category_tree_1 amazon_similar_item_category_tree_2
        
      
        
          
          
                  amazon_similar_item_category_tree_3 amazon_similar_item_category_tree_4
        
      
        
          
          
                  amazon_similar_item_category_tree_5 amazon_similar_item_category_tree_6
        
      
        
          
          
                  amazon_similar_item_category_tree_7 amazon_similar_item_category_tree_8
        
      
        
          
          
                  amazon_similar_item_category_tree_9 amazon_similar_item_category_tree_10
        
      
        
          
          
                  amazon_similar_item_category_tree_11 amazon_similar_item_category_tree_12
        
      
        
          
          
                  amazon_similar_item_category_tree_13 amazon_similar_item_category_tree_14
        
      
        
          
          
                  amazon_similar_item_category_tree_15
        
      
        
          
          
                  amazon_similar_item_category_external_id_1 amazon_similar_item_category_external_id_2
        
      
        
          
          
                  amazon_similar_item_category_external_id_3 amazon_similar_item_category_external_id_4
        
      
        
          
          
                  amazon_similar_item_category_external_id_5 amazon_similar_item_category_external_id_6
        
      
        
          
          
                  amazon_similar_item_category_external_id_7 amazon_similar_item_category_external_id_8
        
      
        
          
          
                  amazon_similar_item_category_external_id_9 amazon_similar_item_category_external_id_10
        
      
        
          
          
                  amazon_similar_item_category_external_id_11 amazon_similar_item_category_external_id_12
        
      
        
          
          
                  amazon_similar_item_category_external_id_13 amazon_similar_item_category_external_id_14
        
      
        
          
          
                  amazon_similar_item_category_external_id_15
        
      
        
          
          
                  bn_also_bought_ean_1 bn_also_bought_author_1 bn_also_bought_price_1 bn_also_bought_title_1
        
      
        
          
          
                  bn_also_bought_ean_2 bn_also_bought_author_2 bn_also_bought_price_2 bn_also_bought_title_2
        
      
        
          
          
                  bn_also_bought_ean_3 bn_also_bought_author_3 bn_also_bought_price_3 bn_also_bought_title_3
        
      
        
          
          
                  bn_also_bought_ean_4 bn_also_bought_author_4 bn_also_bought_price_4 bn_also_bought_title_4
        
      
        
          
          
                  bn_also_bought_ean_5 bn_also_bought_author_5 bn_also_bought_price_5 bn_also_bought_title_5
        
      
        
          
          
                  bn_also_bought_ean_6 bn_also_bought_author_6 bn_also_bought_price_6 bn_also_bought_title_6
        
      
        
          
          
                  related_formats_kindle_price related_formats_kindle_asin related_formats_mass_market_paperback_price
        
      
        
          
          
                  related_formats_mass_market_paperback_asin related_formats_nook_price related_formats_nook_ean
        
      
        
          
          
                  related_formats_hardcover_price related_formats_hardcover_asin related_formats_paperback_price
        
      
        
          
          
                  related_formats_paperback_asin
        
      
        
          
          
                  overall_author_rank sub_category1_author_rank sub_category2_author_rank sub_category3_author_rank sub_category4_author_rank
        
      
        
          
          
                  sub_category1_author_rank_id sub_category2_author_rank_id sub_category3_author_rank_id sub_category4_author_rank_id
        
      
        
          
          
                  sub_category1_author_rank_id_fallback sub_category2_author_rank_id_fallback
        
      
        
          
          
                  sub_category3_author_rank_id_fallback sub_category4_author_rank_id_fallback]
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def book_version_categories_columns
        
      
        
          
          
                %w[warehouse_book_version_id category_name warehouse_category_id warehouse_region_id]
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.process_date(date_or_date_string, warehouse_date_id, etl_product_stats, etl_book_version_categories, etl_category_stats)
        
      
        
          2
          
                date_string = date_or_date_string.is_a?(Date) ? date_or_date_string.to_s : date_or_date_string
        
      
        
          2
          
                BookVersionCategory.connection.execute("truncate #{BookVersionCategory.table_name}") if etl_book_version_categories
        
      
        
          
          
          
        
      
        
          2
          
                warehouse_region_com_id = WarehouseRegion.com.id
        
      
        
          2
          
                warehouse_region_co_uk_id = WarehouseRegion.couk.id
        
      
        
          2
          
                MongoUtilities.initialize_daily_scrape_field_counts(date_string)
        
      
        
          
          
          
        
      
        
          2
          
                collection = MongoUtilities.daily_collection(:stats, date_string.to_date)
        
      
        
          
          
          
        
      
        
          2
          
                ids = WarehouseBookVersion.connection.execute("SELECT t.id FROM (SELECT id::varchar(255), row_number() OVER(ORDER BY id::varchar(255) ASC) AS row_asc FROM warehouse_book_versions where status = 'ingested') t WHERE t.row_asc % #{BATCH_SIZE} = 0 OR t.row_asc % #{BATCH_SIZE} = #{BATCH_SIZE - 1} OR t.row_asc = 1").values.flatten
        
      
        
          2
          
                ids += WarehouseBookVersion.connection.execute("SELECT id::varchar(255) FROM warehouse_book_versions where status = 'ingested' order by id::varchar(255) DESC limit 1").values.flatten
        
      
        
          2
          
                params = ids.uniq.each_slice(2).collect do |slice|
        
      
        
          3
          
                  [collection.name, slice.first, slice.last, warehouse_region_com_id, warehouse_region_co_uk_id, warehouse_date_id, date_string, etl_product_stats, etl_book_version_categories, etl_category_stats]
        
      
        
          
          
                end.compact
        
      
        
          
          
          
        
      
        
          2
          
                Sidekiq::Client.push_bulk 'class' => EtlWorkers::NightlyStatsEtl, 'args' => params if params.present?
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class NightlyListStatEtl
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :etl
        
      
        
          1
          
              BATCH_SIZE = 5000
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, warehouse_date_id, batch_number)
        
      
        
          14
          
                collection = $mongodb.collection(collection_name)
        
      
        
          14
          
                warehouse_list_stat_columns = %w[created_at updated_at mongo_id
        
      
        
          
          
                                                 title name rank days_in_top_100 price author warehouse_trend_id warehouse_book_version_id
        
      
        
          
          
                                                 warehouse_category_id warehouse_date_id asin isbn bn_id itunes_id list_type]
        
      
        
          14
          
                timestamp = Time.current.utc
        
      
        
          
          
          
        
      
        
          14
          
                warehouse_trend_id_by_name = WarehouseTrend.value_of(:name, :id).each_with_object({}) do |name_and_id, hash|
        
      
        
          1
          
                  hash[name_and_id[0]] = name_and_id[1]
        
      
        
          
          
                end
        
      
        
          44
          
                asins = collection.find.skip(batch_number * BATCH_SIZE).limit(BATCH_SIZE).collect {|record| record['stats'].collect {|stat| stat['asin']}}.flatten.compact
        
      
        
          44
          
                isbn13s = collection.find.skip(batch_number * BATCH_SIZE).limit(BATCH_SIZE).collect {|record| record['stats'].collect {|stat| stat['isbn']}}.flatten.compact
        
      
        
          14
          
                warehouse_book_version_id_by_tld_and_asin = WarehouseBookVersion.where(asin: asins).value_of(:tld, :asin, :id).each_with_object({'.com' => {}, '.co.uk' => {}}) do |asin_and_id, hash|
        
      
        
          2
          
                  hash[asin_and_id[0]][asin_and_id[1]] = asin_and_id[2]
        
      
        
          
          
                end
        
      
        
          14
          
                warehouse_book_version_id_by_tld_and_isbn13 = WarehouseBookVersion.where(isbn13: isbn13s).value_of(:tld, :isbn13, :id).each_with_object({'.com' => {}, '.co.uk' => {}}) do |isbn13_and_id, hash|
        
      
        
          1
          
                  hash[isbn13_and_id[0]][isbn13_and_id[1]] = isbn13_and_id[2]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                # TODO: mongo_id is not indexed, it needs to be before this query will return in a reasonable time
        
      
        
          
          
                # record = collection.find({}).skip(batch_number * BATCH_SIZE).limit(1).first
        
      
        
          
          
                # return if WarehouseListStat.where(warehouse_date_id: warehouse_date_id, mongo_id: record['mongo_id']).exists?
        
      
        
          
          
          
        
      
        
          14
          
                connection = WarehouseListStat.connection.raw_connection
        
      
        
          14
          
                sql = "COPY warehouse_list_stats (#{warehouse_list_stat_columns.join(',')}) FROM STDIN CSV NULL '' QUOTE '\"' ESCAPE '\\'"
        
      
        
          14
          
                connection.exec(sql)
        
      
        
          
          
          
        
      
        
          14
          
                errmsg = nil
        
      
        
          14
          
                begin
        
      
        
          14
          
                  collection.find({}, timeout: false) do |cursor|
        
      
        
          14
          
                    cursor.skip(batch_number * BATCH_SIZE).limit(BATCH_SIZE).each do |record|
        
      
        
          15
          
                      record['stats'].each do |stat|
        
      
        
          15
          
                        stats = [timestamp, timestamp, Utilities.prepare_string_for_copy(record['_id']), Utilities.prepare_string_for_copy(stat['title']),
        
      
        
          
          
                                 Utilities.prepare_string_for_copy(record['name']), Utilities.prepare_integer_for_copy(stat['rank']),
        
      
        
          
          
                                 Utilities.prepare_integer_for_copy(stat['days_in_top_100']), Utilities.prepare_integer_for_copy(stat['price']),
        
      
        
          
          
                                 Utilities.prepare_string_for_copy(stat['author']), warehouse_trend_id_by_name[stat['trend']],
        
      
        
          15
          
                                 (warehouse_book_version_id_by_tld_and_asin[record['tld']][stat['asin']] || warehouse_book_version_id_by_tld_and_isbn13[record['tld']][stat['isbn']]),
        
      
        
          
          
                                 record['warehouse_category_id'], warehouse_date_id, Utilities.prepare_string_for_copy(stat['asin']),
        
      
        
          
          
                                 Utilities.prepare_string_for_copy(stat['isbn']), Utilities.prepare_string_for_copy(stat['bn_id']),
        
      
        
          
          
                                 Utilities.prepare_string_for_copy(record['itunes_id']), Utilities.prepare_string_for_copy(record['list_type'])]
        
      
        
          
          
          
        
      
        
          15
          
                        connection.put_copy_data("#{stats.join(',')}\n")
        
      
        
          
          
                      end
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                rescue Errno => err
        
      
        
          
          
                  errmsg = '%s while reading copy data: %s' % [err.class.name, err.message]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          14
          
                PostgresUtilities.finalize_copy_command connection, errmsg
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.process_date(date_or_date_string, warehouse_date_id)
        
      
        
          1
          
                date = date_or_date_string.is_a?(Date) ? date_or_date_string : date_or_date_string.to_date
        
      
        
          1
          
                collections = [MongoUtilities.daily_collection(:amazon_list_stats, date),
        
      
        
          
          
                               MongoUtilities.daily_collection(:bn_list_stats, date),
        
      
        
          
          
                               MongoUtilities.daily_collection(:apple_list_stats, date)]
        
      
        
          
          
          
        
      
        
          1
          
                params = collections.inject([]) do |memo, collection|
        
      
        
          3
          
                  memo + (collection.count / BATCH_SIZE.to_f).ceil.times.collect do |batch_number|
        
      
        
          6
          
                    [collection.name, warehouse_date_id, batch_number]
        
      
        
          3
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                Sidekiq::Client.push_bulk 'class' => EtlWorkers::NightlyListStatEtl, 'args' => params
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class BookCategoryEtl
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          4
          
                warehouse_category_columns = %w[name category_id tld depth status category_type]
        
      
        
          
          
          
        
      
        
          
          
                # [name, category_id, tld]
        
      
        
          4
          
                amazon_book_category_data = RedisUtilities.get_set_members :scraped_categories
        
      
        
          12
          
                existing_category_details = WarehouseCategory.amazon.where(name: amazon_book_category_data.collect {|data| data[0]}).value_of(:name, :category_id, :tld)
        
      
        
          
          
          
        
      
        
          4
          
                category_values = (amazon_book_category_data - existing_category_details).select do |new_category_data|
        
      
        
          6
          
                  AmazonBestSellersPage.is_acceptable_amazon_category_name? new_category_data[0]
        
      
        
          
          
                end.collect do |new_category_data|
        
      
        
          4
          
                  [new_category_data[0], new_category_data[1], new_category_data[2], Utilities.get_depth_from_category_name(new_category_data[0]), 'alternative', 'AmazonBookCategory']
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          4
          
                if category_values.present?
        
      
        
          2
          
                  dw_ids = WarehouseCategory.batch_insert warehouse_category_columns, category_values
        
      
        
          2
          
                  Sidekiq::Client.push_bulk('class' => WarehouseCategoryWorkers::FillInWarehouseCategoryParent, 'args' => dw_ids.collect(&method(:Array)))
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class BookVersionExceptionEtl
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :etl
        
      
        
          1
          
              BATCH_SIZE = 100000
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, batch_number, warehouse_date_id)
        
      
        
          1
          
                columns = %w[created_at updated_at warehouse_date_id
        
      
        
          
          
                             warehouse_book_version_id amazon_not_found_in_search amazon_no_image amazon_no_buy_button amazon_no_price
        
      
        
          
          
                             bn_not_found_in_search no_isbn amazon_ambiguous_result apple_invalid]
        
      
        
          1
          
                timestamp = Time.current.utc
        
      
        
          1
          
                connection = BookVersionException.connection.raw_connection
        
      
        
          1
          
                sql = "COPY book_version_exceptions (#{columns.join(',')}) FROM STDIN CSV NULL '' QUOTE '\"' ESCAPE '\\'"
        
      
        
          1
          
                connection.exec(sql)
        
      
        
          
          
          
        
      
        
          1
          
                errmsg = nil
        
      
        
          1
          
                collection = $mongodb.collection collection_name
        
      
        
          1
          
                begin
        
      
        
          1
          
                  collection.find({}, timeout: false) do |cursor|
        
      
        
          1
          
                    cursor.skip(batch_number * BATCH_SIZE).limit(BATCH_SIZE).each do |record|
        
      
        
          1
          
                      stats = [timestamp, timestamp, warehouse_date_id, record['_id'], !!record['amazon_not_found_in_search'],
        
      
        
          
          
                               !!record['amazon_no_image'], !!record['amazon_no_buy_button'], !!record['amazon_no_price'],
        
      
        
          
          
                               !!record['bn_not_found_in_search'], !!record['no_isbn'], !!record['amazon_ambiguous_result'], !!record['apple_invalid']]
        
      
        
          
          
          
        
      
        
          1
          
                      connection.put_copy_data("#{stats.join(',')}\n")
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                rescue Errno => err
        
      
        
          
          
                  errmsg = '%s while reading copy data: %s' % [err.class.name, err.message]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                PostgresUtilities.finalize_copy_command connection, errmsg
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.process_date(date_or_date_string)
        
      
        
          1
          
                date = date_or_date_string.is_a?(Date) ? date_or_date_string : date_or_date_string.to_date
        
      
        
          1
          
                warehouse_date_id = WarehouseDate.where(date: date).first_or_create.id
        
      
        
          1
          
                collection = MongoUtilities.daily_collection(:book_version_exceptions, date)
        
      
        
          1
          
                params = (collection.count / BATCH_SIZE.to_f).ceil.times.collect do |batch_number|
        
      
        
          2
          
                  [collection.name, batch_number, warehouse_date_id]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => EtlWorkers::BookVersionExceptionEtl, 'args' => params) if params.present?
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class CategoryStatsEtl
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :etl
        
      
        
          
          
          
        
      
        
          1
          
              def perform(date_string)
        
      
        
          
          
                date = date_string.to_date
        
      
        
          
          
                warehouse_date_id = WarehouseDate.find_by(date: date).id
        
      
        
          
          
                category_stats_fragments = CategoryStatsCollection.new(date).find.to_a
        
      
        
          
          
                category_stats = category_stats_fragments.pop.except('_id')
        
      
        
          
          
                category_stats_fragments.each do |stats|
        
      
        
          
          
                  stats.each_pair do |category_key, hash|
        
      
        
          
          
                    next if category_key == '_id'
        
      
        
          
          
          
        
      
        
          
          
                    if category_stats[category_key].present?
        
      
        
          
          
                      category_stats[category_key]['best_rank']
        
      
        
          
          
                      if hash['best_rank'] < category_stats[category_key]['best_rank']
        
      
        
          
          
                        category_stats[category_key]['best_rank'] = hash['best_rank']
        
      
        
          
          
                        category_stats[category_key]['best_rank_book_version_id'] = hash['best_rank_book_version_id']
        
      
        
          
          
                      end
        
      
        
          
          
                      if hash['worst_rank'] > category_stats[category_key]['worst_rank']
        
      
        
          
          
                        category_stats[category_key]['worst_rank'] = hash['worst_rank']
        
      
        
          
          
                        category_stats[category_key]['worst_rank_book_version_id'] = hash['worst_rank_book_version_id']
        
      
        
          
          
                      end
        
      
        
          
          
                      category_stats[category_key]['ranks'] += hash['ranks']
        
      
        
          
          
                    else
        
      
        
          
          
                      category_stats[category_key] = hash
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                columns = %w[best_rank worst_rank book_version_count mean_rank median_rank category_name best_rank_book_version_id
        
      
        
          
          
                             worst_rank_book_version_id warehouse_region_id warehouse_date_id warehouse_category_id]
        
      
        
          
          
          
        
      
        
          
          
                attributes = category_stats.collect do |_, stats|
        
      
        
          
          
                  [stats['best_rank'], stats['worst_rank'], stats['ranks'].count, stats['ranks'].mean, stats['ranks'].median, stats['category_name'],
        
      
        
          
          
                   stats['best_rank_book_version_id'], stats['worst_rank_book_version_id'], stats['warehouse_region_id'], warehouse_date_id,
        
      
        
          
          
                   stats['warehouse_category_id']]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                CategoryStat.batch_insert columns, attributes
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module MaintenanceWorkers
        
      
        
          1
          
            class RunPostgresAnalyze
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          2
          
                Rails.logger.tagged('dw') {Rails.logger.info "Starting postgres ANALYZE process: #{Time.current}"}
        
      
        
          1
          
                ActiveRecord::Base.connection.uncached do
        
      
        
          1
          
                  ActiveRecord::Base.connection.execute('ANALYZE VERBOSE;')
        
      
        
          
          
                end
        
      
        
          2
          
                Rails.logger.tagged('dw') {Rails.logger.info "Completed postgres ANALYZE process: #{Time.current}"}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class ClearStaleWorkers
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform(queue_names = %w[scraper])
        
      
        
          1
          
                RedisUtilities.clear_sleeping_and_phantom_workers(queue_names, 1200)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module MergePurgeJobs
        
      
        
          1
          
            class ReconcileCSV
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform(file_location, user_email)
        
      
        
          2
          
                Rails.logger.tagged('ingestions') {Rails.logger.info 'Sidekiq worker Reconcile CSV started'}
        
      
        
          1
          
                csv = File.new("#{Rails.root}/tmp/#{File.basename(file_location)}", 'wb')
        
      
        
          1
          
                csv << open(file_location).read
        
      
        
          1
          
                csv.flush
        
      
        
          
          
          
        
      
        
          1
          
                user = User.find_by email: user_email
        
      
        
          1
          
                MergePurge.reconcile_csv(csv, user)
        
      
        
          1
          
                File.delete(csv)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class ReconcileVookProductionList
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform(user_email)
        
      
        
          
          
                Rails.logger.tagged('ingestions') {Rails.logger.info 'Sidekiq worker Reconcile CSV started'}
        
      
        
          
          
                client = Restforce.new :username => 'system@vook.com',
        
      
        
          
          
                                       :password       => 'v0oknation',
        
      
        
          
          
                                       :security_token => 'kt6KNlNaQluhkbcstMG6UGLn',
        
      
        
          
          
                                       :client_id      => '3MVG9ytVT1SanXDk2W.y22B_aWDxjFj_QyQVOrCscHWaHOCaR8I03EpC6OqBzekjbhNtyO99NOYiBj9ZPEtRS',
        
      
        
          
          
                                       :client_secret  => '7035778425829838587'
        
      
        
          
          
          
        
      
        
          
          
                array = []
        
      
        
          
          
                client.query("SELECT ISBN__c, ASIN__c FROM Title__c WHERE Abandoned__c = False AND (AmazonKDP__c = True or AmazonKindle__c = True)
        
      
        
          
          
                    AND Account__c NOT IN ('0016000000uvL4Z', '0016000000o4LsM') AND DistributionCompletedDate__c <= #{(Date.today-6.days)} AND RevisingTitle__c = ''").each do |data|
        
      
        
          
          
                  array << [data['ISBN__c'], data['ASIN__c']]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                user = User.find_by email: user_email
        
      
        
          
          
                MergePurge.reconcile_asin_and_isbn13_array_no_metadata(array, user)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
          
        
      
        
          
          
          end

    
      
        
          1
          
          module MergePurgeWorkers
        
      
        
          1
          
            class ProcessOnixDeltas
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          2
          
                Rails.logger.tagged('onix') {Rails.logger.info 'Sidekiq worker Reconcile ONIX XML started'}
        
      
        
          
          
          
        
      
        
          1
          
                most_recent_sunday = Date.current.wday == 0 ? Date.current : Date.current - Date.current.wday
        
      
        
          1
          
                rhinc_user = User.find_by email: 'rhincactive@booklr.com'
        
      
        
          1
          
                rhde_user = User.find_by email: 'rhde@booklr.com'
        
      
        
          
          
          
        
      
        
          
          
                # Physical Delta
        
      
        
          1
          
                xml_file_name = nil
        
      
        
          1
          
                Net::FTP.open('ftp.randomhouse.com', 'rhcat', 'rhcat') do |ftp|
        
      
        
          1
          
                  ftp.passive = true
        
      
        
          1
          
                  ftp.chdir '/onix_21/delta'
        
      
        
          1
          
                  if ftp.nlst.last.include?((most_recent_sunday).to_s.gsub('-', ''))
        
      
        
          1
          
                    file_name = ftp.nlst.last
        
      
        
          1
          
                    file = File.open("#{Rails.root}/tmp/#{file_name}", 'w')
        
      
        
          1
          
                    ftp.get(file_name, file)
        
      
        
          1
          
                    xml_file_name = EnterpriseReports.unzip_file(file, "#{Rails.root}/tmp/").first.to_s
        
      
        
          1
          
                    File.delete(file)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                # main physical list (with no export edition titles) goes only on the rhinc account
        
      
        
          1
          
                xml = File.open("#{Rails.root}/tmp/#{xml_file_name}")
        
      
        
          1
          
                MergePurge.reconcile_onix_xml(xml, rhinc_user)
        
      
        
          1
          
                File.delete(xml)
        
      
        
          
          
          
        
      
        
          
          
                # eBook Deltas
        
      
        
          1
          
                agency_xml_file_name = nil
        
      
        
          1
          
                wholesale_xml_file_name = nil
        
      
        
          1
          
                Net::FTP.open('ftp.randomhouse.com', 'ebookgreen', 'rand0mgr') do |ftp|
        
      
        
          1
          
                  ftp.passive = true
        
      
        
          
          
          
        
      
        
          
          
                  # Agency
        
      
        
          1
          
                  ftp.chdir '/onix_21/delta/Agency'
        
      
        
          1
          
                  if ftp.nlst.last.include?((most_recent_sunday).to_s.gsub('-', ''))
        
      
        
          1
          
                    file_name = ftp.nlst.last
        
      
        
          1
          
                    file = File.open("#{Rails.root}/tmp/#{file_name}", "w")
        
      
        
          1
          
                    ftp.get(file_name, file)
        
      
        
          1
          
                    agency_xml_file_name = EnterpriseReports.unzip_file(file, "#{Rails.root}/tmp/").first.to_s
        
      
        
          1
          
                    File.delete(file)
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  # Wholesale
        
      
        
          1
          
                  ftp.chdir '/onix_21/delta/Wholesale'
        
      
        
          1
          
                  if ftp.nlst.last.include?((most_recent_sunday).to_s.gsub('-', ''))
        
      
        
          1
          
                    file_name = ftp.nlst.last
        
      
        
          1
          
                    file = File.open("#{Rails.root}/tmp/#{file_name}", 'w')
        
      
        
          1
          
                    ftp.get(file_name, file)
        
      
        
          1
          
                    wholesale_xml_file_name = EnterpriseReports.unzip_file(file, "#{Rails.root}/tmp/").first.to_s
        
      
        
          1
          
                    File.delete(file)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                # Ebook deltas go on the rhinc and rhde accounts
        
      
        
          1
          
                xml = File.open("#{Rails.root}/tmp/#{agency_xml_file_name}")
        
      
        
          1
          
                MergePurge.reconcile_onix_xml(xml, rhinc_user)
        
      
        
          
          
          
        
      
        
          
          
                # reopen file for RHDE processing
        
      
        
          1
          
                xml = File.open("#{Rails.root}/tmp/#{agency_xml_file_name}")
        
      
        
          1
          
                MergePurge.reconcile_onix_xml(xml, rhde_user)
        
      
        
          1
          
                File.delete(xml)
        
      
        
          
          
          
        
      
        
          1
          
                xml = File.open("#{Rails.root}/tmp/#{wholesale_xml_file_name}")
        
      
        
          1
          
                MergePurge.reconcile_onix_xml(xml, rhinc_user)
        
      
        
          
          
          
        
      
        
          
          
                # reopen file for RHDE processing
        
      
        
          1
          
                xml = File.open("#{Rails.root}/tmp/#{wholesale_xml_file_name}")
        
      
        
          1
          
                MergePurge.reconcile_onix_xml(xml, rhde_user)
        
      
        
          1
          
                File.delete(xml)
        
      
        
          
          
          
        
      
        
          
          
                # International Physical Delta only goes on the rhde account (this is missing all Canada exclusives included in
        
      
        
          
          
                # the main physical feed but includes the export edition titles)
        
      
        
          1
          
                xml_file_name = nil
        
      
        
          1
          
                Net::FTP.open('ftp.randomhouse.com', 'rhintcat', 'RH1ntCat') do |ftp|
        
      
        
          1
          
                  ftp.passive = true
        
      
        
          1
          
                  ftp.chdir '/onix_21/delta'
        
      
        
          1
          
                  if ftp.nlst.last.include?((most_recent_sunday).to_s.gsub("-",""))
        
      
        
          1
          
                    file_name = ftp.nlst.last
        
      
        
          1
          
                    file = File.open("#{Rails.root}/tmp/#{file_name}", "w")
        
      
        
          1
          
                    ftp.get(file_name, file)
        
      
        
          1
          
                    xml_file_name = EnterpriseReports.unzip_file(file,"#{Rails.root}/tmp/").first.to_s
        
      
        
          1
          
                    File.delete(file)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                xml = File.open("#{Rails.root}/tmp/#{xml_file_name}")
        
      
        
          1
          
                MergePurge.reconcile_onix_xml(xml, rhde_user)
        
      
        
          1
          
                File.delete(xml)
        
      
        
          
          
          
        
      
        
          2
          
                Rails.logger.tagged('onix') {Rails.logger.info 'Reconcile ONIX XML completed processing all 3 files'}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class UpdateMetadataFromOnix
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          2
          
                Rails.logger.tagged('onix') {Rails.logger.info 'Sidekiq worker Update Metadata from full ONIX XML started'}
        
      
        
          
          
          
        
      
        
          1
          
                most_recent_sunday = Date.current.wday == 0 ? Date.current : Date.current - Date.current.wday
        
      
        
          
          
          
        
      
        
          
          
                # Physical Delta
        
      
        
          1
          
                xml_file_name = nil
        
      
        
          1
          
                Net::FTP.open('ftp.randomhouse.com', 'rhcat', 'rhcat') do |ftp|
        
      
        
          1
          
                  ftp.passive = true
        
      
        
          1
          
                  ftp.chdir '/onix_21/full'
        
      
        
          1
          
                  if ftp.nlst.last.include?((most_recent_sunday).to_s.gsub('-', ''))
        
      
        
          1
          
                    file_name = ftp.nlst.last
        
      
        
          1
          
                    file = File.open("#{Rails.root}/tmp/#{file_name}", 'w')
        
      
        
          1
          
                    ftp.get(file_name, file)
        
      
        
          1
          
                    xml_file_name = EnterpriseReports.unzip_file(file, "#{Rails.root}/tmp/").first.to_s
        
      
        
          1
          
                    File.delete(file)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                xml = File.open("#{Rails.root}/tmp/#{xml_file_name}")
        
      
        
          1
          
                user = User.find_by email: 'rhincactive@booklr.com'
        
      
        
          1
          
                MergePurge.update_metadata(xml, user)
        
      
        
          1
          
                File.delete(xml)
        
      
        
          
          
          
        
      
        
          
          
                # eBook Deltas
        
      
        
          1
          
                agency_xml_file_name = nil
        
      
        
          1
          
                wholesale_xml_file_name = nil
        
      
        
          1
          
                Net::FTP.open('ftp.randomhouse.com', 'ebookgreen', 'rand0mgr') do |ftp|
        
      
        
          1
          
                  ftp.passive = true
        
      
        
          
          
          
        
      
        
          
          
                  # Agency
        
      
        
          1
          
                  ftp.chdir '/onix_21/full/Agency'
        
      
        
          1
          
                  if ftp.nlst.last.include?((most_recent_sunday).to_s.gsub('-', ''))
        
      
        
          1
          
                    file_name = ftp.nlst.last
        
      
        
          1
          
                    file = File.open("#{Rails.root}/tmp/#{file_name}", 'w')
        
      
        
          1
          
                    ftp.get(file_name, file)
        
      
        
          1
          
                    agency_xml_file_name = EnterpriseReports.unzip_file(file, "#{Rails.root}/tmp/").first.to_s
        
      
        
          1
          
                    File.delete(file)
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  # Wholesale
        
      
        
          1
          
                  ftp.chdir '/onix_21/full/Wholesale'
        
      
        
          1
          
                  if ftp.nlst.last.include?((most_recent_sunday).to_s.gsub('-', ''))
        
      
        
          1
          
                    file_name = ftp.nlst.last
        
      
        
          1
          
                    file = File.open("#{Rails.root}/tmp/#{file_name}", 'w')
        
      
        
          1
          
                    ftp.get(file_name, file)
        
      
        
          1
          
                    wholesale_xml_file_name = EnterpriseReports.unzip_file(file, "#{Rails.root}/tmp/").first.to_s
        
      
        
          1
          
                    File.delete(file)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                xml = File.open("#{Rails.root}/tmp/#{agency_xml_file_name}")
        
      
        
          1
          
                MergePurge.update_metadata(xml, user)
        
      
        
          1
          
                File.delete(xml)
        
      
        
          
          
          
        
      
        
          1
          
                xml = File.open("#{Rails.root}/tmp/#{wholesale_xml_file_name}")
        
      
        
          1
          
                MergePurge.update_metadata(xml, user)
        
      
        
          1
          
                File.delete(xml)
        
      
        
          
          
          
        
      
        
          2
          
                Rails.logger.tagged('onix') {Rails.logger.info 'Sidekiq worker Update Metadata from full ONIX XML completed processing all 3 files'}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module MongoBookCategoryWorkers
        
      
        
          1
          
            class AmazonCategoryScraper
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(category_id, category_name, tld, retry_count = 0)
        
      
        
          11
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          
          
          
        
      
        
          11
          
                collection = AmazonCategoryCollection.new
        
      
        
          11
          
                page = AmazonBestSellersPage.by_category_id_and_tld_and_base_category_and_page_number category_id, tld, ScraperUtilities.base_category_from_category_name(category_name), 1
        
      
        
          11
          
                if handle_captcha(page, 60, category_id, category_name, tld, retry_count)
        
      
        
          9
          
                  category_name_from_page = page.scrape_category_name
        
      
        
          9
          
                  if AmazonBestSellersPage.is_acceptable_amazon_category_name? category_name_from_page
        
      
        
          8
          
                    if category_id.nil? || category_name_from_page == category_name
        
      
        
          6
          
                      collection.add_category_details category_id, category_name, tld, :canonical
        
      
        
          
          
          
        
      
        
          6
          
                      page.scrape_subcategories.each do |subcategory_details|
        
      
        
          12
          
                        args = {category_type: 'AmazonBookCategory', category_id: subcategory_details[:category_id],
        
      
        
          
          
                                name: subcategory_details[:category_name], status: :alternative, tld: tld}
        
      
        
          12
          
                        Sidekiq::Client.push 'class' => 'WarehouseCategoryWorkers::CreateAmazon', 'queue' => 'high', 'args' => [args]
        
      
        
          12
          
                        MongoBookCategoryWorkers::AmazonCategoryScraper.perform_async subcategory_details[:category_id], subcategory_details[:category_name], tld
        
      
        
          6
          
                      end if page.scrape_subcategories.present?
        
      
        
          
          
                    else
        
      
        
          2
          
                      collection.add_category_details category_id, category_name_from_page, tld, :canonical
        
      
        
          2
          
                      collection.add_category_details category_id, category_name, tld, :alternative
        
      
        
          2
          
                      args = {category_type: 'AmazonBookCategory', category_id: category_id, name: category_name_from_page,
        
      
        
          
          
                              status: :canonical, tld: tld}
        
      
        
          2
          
                      Sidekiq::Client.push 'class' => 'WarehouseCategoryWorkers::CreateAmazon', 'queue' => 'high', 'args' => [args]
        
      
        
          
          
                    end
        
      
        
          
          
                  else
        
      
        
          1
          
                    collection.add_category_details category_id, category_name, tld, :deleted
        
      
        
          
          
                  end
        
      
        
          
          
                else
        
      
        
          2
          
                  if retry_count < 5
        
      
        
          1
          
                    self.class.perform_async category_id, category_name, tld, retry_count + 1
        
      
        
          
          
                  else
        
      
        
          1
          
                    collection.add_category_details category_id, category_name, tld, :deleted
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module MongoBookVersionExceptionWorkers
        
      
        
          1
          
            class DiscoverAmazon404s
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(warehouse_book_version_id, asin, tld)
        
      
        
          
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          
          
                amazon_page = AmazonProductPage.by_asin_and_tld asin, tld
        
      
        
          
          
                if handle_captcha(amazon_page, 60, warehouse_book_version_id, asin, tld)
        
      
        
          
          
                  BookVersionStatusCollection.new.set_book_version_status warehouse_book_version_id, :page_not_found if amazon_page.response_code == '404'
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class ValidateAmazonSearch
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, warehouse_book_version_id, isbn_or_asin, asin, tld, status, url_hints)
        
      
        
          6
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          6
          
                search_page = AmazonSearchPage.by_isbn_or_asin_and_tld isbn_or_asin, tld
        
      
        
          6
          
                if handle_captcha(search_page, 60, collection_name, warehouse_book_version_id, isbn_or_asin, asin, tld, status, url_hints)
        
      
        
          6
          
                  exceptions = {amazon_not_found_in_search: Validations.amazon_not_found_in_search?(search_page),
        
      
        
          
          
                                amazon_ambiguous_result: Validations.amazon_ambiguous_search_results?(search_page, url_hints)}
        
      
        
          
          
          
        
      
        
          6
          
                  if exceptions[:amazon_not_found_in_search] || exceptions[:amazon_ambiguous_result]
        
      
        
          4
          
                    new_status = :invalid_on_amazon
        
      
        
          
          
                  else
        
      
        
          2
          
                    new_status = :validated
        
      
        
          2
          
                    MongoBookVersionExceptionWorkers::ValidateAmazonProductPage.perform_async collection_name, warehouse_book_version_id, asin, tld, search_page.matching_url_from_search_results(url_hints)
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          6
          
                  MongoUtilities.add_exception_to_collection $mongodb.collection(collection_name), warehouse_book_version_id, exceptions
        
      
        
          6
          
                  BookVersionStatusCollection.new.set_book_version_status warehouse_book_version_id, new_status unless status.to_s == 'ingested'
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class ValidateAmazonProductPage
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, warehouse_book_version_id, asin, tld, matching_search_url)
        
      
        
          3
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          3
          
                page = asin.present? ? AmazonProductPage.by_asin_and_tld(asin, tld) : AmazonProductPage.new(matching_search_url)
        
      
        
          
          
          
        
      
        
          3
          
                if handle_captcha(page, 60, collection_name, warehouse_book_version_id, asin, tld, matching_search_url)
        
      
        
          1
          
                  exceptions = {amazon_no_price: Validations.amazon_no_price?(page),
        
      
        
          
          
                                amazon_no_image: Validations.amazon_no_image?(page),
        
      
        
          
          
                                amazon_no_buy_button: Validations.amazon_no_buy_button?(page)}
        
      
        
          
          
          
        
      
        
          1
          
                  MongoUtilities.add_exception_to_collection $mongodb.collection(collection_name), warehouse_book_version_id, exceptions
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class ValidateBarnesAndNoble
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :bn_stat_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, warehouse_book_version_id, isbn_or_asin)
        
      
        
          1
          
                exception = {no_isbn: Validations.no_isbn?(isbn_or_asin)}
        
      
        
          1
          
                if exception[:no_isbn]
        
      
        
          1
          
                  exception[:bn_not_found_in_search] = true
        
      
        
          
          
                else
        
      
        
          
          
                  ProxyUtilities.proxy_setup :barnes_and_noble
        
      
        
          
          
                  page = BnSearchPage.by_isbn isbn_or_asin
        
      
        
          
          
          
        
      
        
          
          
                  # sleep for 60 seconds and requeue the job if you get throttled
        
      
        
          
          
                  if bn_captcha_sleepy_time(page, 60, collection_name, warehouse_book_version_id, isbn_or_asin)
        
      
        
          
          
                    exception[:bn_not_found_in_search] = Validations.bn_not_found_in_search?(page)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                MongoUtilities.add_exception_to_collection $mongodb.collection(collection_name), warehouse_book_version_id, exception
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class ValidateItunes
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, warehouse_book_version_id, isbn_or_asin)
        
      
        
          
          
                # iTunes Validations: Only validate if the title is ingested, an ebook, has an isbn and is part of rhincactive list
        
      
        
          1
          
                exceptions = {apple_invalid: Validations.apple_invalid?(isbn_or_asin)}
        
      
        
          1
          
                MongoUtilities.add_exception_to_collection $mongodb.collection(collection_name), warehouse_book_version_id, exceptions
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module MongoListStatWorkers
        
      
        
          1
          
            class AmazonTop100
        
      
        
          1
          
              include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(category_id, tld, base_category, page_number, warehouse_category_id, collection_name)
        
      
        
          8
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          8
          
                page = AmazonBestSellersPage.by_category_id_and_tld_and_base_category_and_page_number category_id, tld, base_category, page_number
        
      
        
          8
          
                if handle_captcha(page, 60, category_id, tld, base_category, page_number, warehouse_category_id, collection_name)
        
      
        
          
          
                  # Queue up remaining top 100 pages for this category if it's the first page
        
      
        
          9
          
                  (2..page.scrape_number_of_pages).each {|page_num| MongoListStatWorkers::AmazonTop100.perform_async category_id, tld, base_category, page_num, warehouse_category_id, collection_name} if page_number == 1
        
      
        
          7
          
                  mongo_id = "#{warehouse_category_id}-#{page_number}"
        
      
        
          7
          
                  data = {_id: mongo_id, warehouse_category_id: warehouse_category_id, tld: tld, stats: page.best_sellers_stats}
        
      
        
          15
          
                  if data[:stats].blank? || (data[:stats].present? && data[:stats].any? {|stat| stat[:rank].blank?})
        
      
        
          3
          
                    MongoListStatWorkers::AmazonTop100.perform_async category_id, tld, base_category, page_number, warehouse_category_id, collection_name unless page.scrape_no_best_sellers?
        
      
        
          
          
                  else
        
      
        
          10
          
                    asins = data[:stats].collect {|stat| stat[:asin]}.uniq
        
      
        
          9
          
                    values = asins.collect {|asin| [{asin: asin, tld: tld, status: :validated_from_top_100s, source: 'amazon-top100'}] unless MongoUtilities.exists_in_all_asin_list?(asin, tld)}.compact
        
      
        
          4
          
                    Sidekiq::Client.push_bulk('class' => 'BookVersionWorkers::Create', 'args' => values, 'queue' => 'background')
        
      
        
          4
          
                    $mongodb.collection(collection_name).insert data
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class BarnesNobleTop100
        
      
        
          1
          
              include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :bn_list_stat_scraper
        
      
        
          
          
          
        
      
        
          1
          
              LIST_NAMES = {bn_top_100_nook_books: 'Barnes & Noble Top 100 NOOK Books', bn_nook_book_bestsellers: 'Barnes & Noble NOOK Book Bestsellers'}.freeze
        
      
        
          
          
          
        
      
        
          1
          
              def perform(list_name, collection_name)
        
      
        
          3
          
                raise ArgumentError unless LIST_NAMES.values.include?(list_name)
        
      
        
          
          
          
        
      
        
          
          
                # always use tor for the 2 list scrapes
        
      
        
          2
          
                ProxyUtilities.force_proxy
        
      
        
          
          
          
        
      
        
          2
          
                stats = send(LIST_NAMES.key(list_name))
        
      
        
          4
          
                values = stats.collect {|stat| [list_name, stat[:rank], stat[:title], stat[:author], stat[:list_price], stat[:price], stat[:href], collection_name]}
        
      
        
          2
          
                Sidekiq::Client.push_bulk('class' => MongoListStatWorkers::BarnesNobleFillInIsbnAndBnId, 'args' => values)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def bn_nook_book_bestsellers
        
      
        
          6
          
                pages = %w[1 21 41 61 81].collect {|start| HttpHelper.get_bn_list_stat_page_html "http://www.barnesandnoble.com/u/nook-books-bestsellers/379003503?start=#{start}"}
        
      
        
          1
          
                pages.each_with_object([]) do |page, array|
        
      
        
          5
          
                  page.css('.result').each do |element|
        
      
        
          100
          
                    array << {rank: (element.css('.resultNum').text.squish if element.css('.resultNum').present?),
        
      
        
          100
          
                              title: (element.css('.title a').first.text.squish if element.css('.title a').present?),
        
      
        
          100
          
                              author: (element.css('.contributor a').first.text.squish if element.css('.contributor a').present?),
        
      
        
          100
          
                              list_price: (element.css('.list-price span').first.text.squish.gsub(/\$|\./, '') if element.css('.list-price span').present?), # No longer on page
        
      
        
          100
          
                              price: (element.css('.pricing.bn-price strong').first.text.squish.gsub(/\$|\./, '') if element.css('.pricing.bn-price strong').present?),
        
      
        
          200
          
                              href: (element.css('.title a').first.attributes['href'].text if element.css('.title a').present?)}
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def bn_top_100_nook_books
        
      
        
          6
          
                pages = %w[1 22 43 64 85].collect {|start| HttpHelper.get_bn_list_stat_page_html "http://www.barnesandnoble.com/ebooks/category.asp?PID=35951&start=#{start}"}
        
      
        
          1
          
                pages.each_with_object([]) do |page, array|
        
      
        
          5
          
                  page.css('.merch-ebook .ebook-info').each do |element|
        
      
        
          
          
                    array << {rank: array.size + 1,
        
      
        
          105
          
                              title: (element.css('h4 a').first.text.squish if element.css('h4 a').present?),
        
      
        
          105
          
                              author: (element.css('.Contributor a').first.text.squish if element.css('.Contributor a').present?),
        
      
        
          105
          
                              list_price: (element.css('.memberPriceGroup1 .list-price').first.text.squish.gsub(/\$|\.| List Price/, '') if element.css('.memberPriceGroup1 .list-price').present?),
        
      
        
          105
          
                              price: (element.css('.memberPriceGroup0 strong').first.text.squish.gsub(/\$|\./, '') if element.css('.memberPriceGroup0 strong').present?),
        
      
        
          210
          
                              href: (element.css('h4 a').first.attributes['href'].text if element.css('h4 a').present?)}
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class BarnesNobleFillInIsbnAndBnId
        
      
        
          1
          
              include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :bn_list_stat_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(list_name, rank, title, author, list_price, price, url, collection_name)
        
      
        
          
          
                # always use tor
        
      
        
          4
          
                ProxyUtilities.force_proxy
        
      
        
          
          
          
        
      
        
          4
          
                stats = {rank: rank, title: title, author: author, list_price: list_price, price: price}
        
      
        
          4
          
                summary_page = HttpHelper.get_html_with_mechanize url, 'Windows IE 6'
        
      
        
          4
          
                if summary_page.present?
        
      
        
          3
          
                  page = if summary_page.css('.product-details').present? && summary_page.css('.product-details').xpath("//li/span[starts-with(., 'Format')]").present? && summary_page.css('.product-details').xpath("//li/span[starts-with(., 'Format')]").first.parent.children.last.text.squish.downcase == 'ebook'
        
      
        
          3
          
                           summary_page
        
      
        
          
          
                         else
        
      
        
          
          
                           nook_book_page_url = summary_page.css('.all-formats-editions .format .image a[data-bn-rel]').map {|element| element.attributes['data-bn-rel'].text}.select {|attr| attr.include?('format=nook-book')}.first
        
      
        
          
          
                           nook_book_page_url.present? ? HttpHelper.get_html_with_mechanize(nook_book_page_url, 'Windows IE 6') : nil
        
      
        
          
          
                         end
        
      
        
          
          
          
        
      
        
          3
          
                  if page.present? && page.css('.product-details').present?
        
      
        
          3
          
                    if page.css('.product-details').xpath("//li/span[starts-with(., 'ISBN-13')]").present?
        
      
        
          2
          
                      isbn = page.css('.product-details').xpath("//li/span[starts-with(., 'ISBN-13')]").first.parent.children.last.text.squish
        
      
        
          2
          
                      stats[:isbn] = isbn
        
      
        
          2
          
                      Sidekiq::Client.push_bulk('class' => 'BookVersionWorkers::Create', 'args' => [[{isbn13: isbn, tld: '.com', source: 'bn-top100'}]], 'queue' => 'background')
        
      
        
          
          
                    elsif page.css('.product-details').xpath("//li/span[starts-with(., 'BN ID')]").present?
        
      
        
          1
          
                      stats[:bn_id] = page.css('.product-details').xpath("//li/span[starts-with(., 'BN ID')]").first.parent.children.last.text.squish
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          4
          
                $mongodb.collection(collection_name).insert({_id: "#{list_name}-#{rank}", name: list_name, tld: '.com', stats: [stats]})
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class AppleTopBooksFeed
        
      
        
          1
          
              include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(category_id, list_type, warehouse_category_id, collection_name)
        
      
        
          1
          
                data = {_id: "#{warehouse_category_id}-#{list_type}", warehouse_category_id: warehouse_category_id, tld: '.com', list_type: list_type, stats: AppleTopBooksRssFeed.by_category_id_and_type(category_id, list_type).stats}
        
      
        
          
          
          
        
      
        
          1
          
                $mongodb.collection(collection_name).insert data
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module MongoPromotionPageWorkers
        
      
        
          1
          
            class AmazonKindleDailyDealsScraper
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          5
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          5
          
                page = AmazonKindleDailyDealsPage.new
        
      
        
          5
          
                if handle_captcha(page, 60)
        
      
        
          5
          
                  deals = page.deals.each {|deal| deal.merge!(promotion_page: 'Amazon Kindle Daily Deals')}
        
      
        
          
          
          
        
      
        
          6
          
                  params = page.search_page_daily_deals.collect {|deal| [deal[:url], deal[:daily_deal_type]]}
        
      
        
          4
          
                  Sidekiq::Client.push_bulk('class' => MongoPromotionPageWorkers::AmazonKindleDailyDealsSearchPageScraper, 'args' => params) if params.present?
        
      
        
          
          
          
        
      
        
          4
          
                  MongoUtilities.daily_collection(:promotion_pages).insert deals if deals.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class AmazonKindleDailyDealsSearchPageScraper
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(url, daily_deal_type)
        
      
        
          3
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          3
          
                page = AmazonSearchPage.new(url)
        
      
        
          3
          
                if handle_captcha(page, 60, url, daily_deal_type)
        
      
        
          2
          
                  deals = page.search_results.each do |deal|
        
      
        
          1
          
                    deal.merge!(promotion_page: 'Amazon Kindle Daily Deals', daily_deal_type: daily_deal_type)
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          2
          
                  MongoUtilities.daily_collection(:promotion_pages).insert deals if deals.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class AmazonKindleMonthlyDealsScraper
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          5
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          5
          
                page = AmazonKindleMonthlyDealsPage.new
        
      
        
          5
          
                if handle_captcha(page, 60)
        
      
        
          5
          
                  deals = page.deals.each {|deal| deal.merge!(promotion_page: 'Amazon Kindle Monthly Deals $3.99 or Less')}
        
      
        
          
          
          
        
      
        
          4
          
                  params = page.category_urls.collect(&method(:Array))
        
      
        
          4
          
                  Sidekiq::Client.push_bulk('class' => MongoPromotionPageWorkers::AmazonKindleMonthlyDealsCategoryPageScraper, 'args' => params) if params.present?
        
      
        
          
          
          
        
      
        
          4
          
                  MongoUtilities.daily_collection(:promotion_pages).insert deals if deals.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class AmazonKindleMonthlyDealsCategoryPageScraper
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(url)
        
      
        
          5
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          5
          
                page = AmazonSearchPage.new(url)
        
      
        
          5
          
                if handle_captcha(page, 60, url)
        
      
        
          4
          
                  deals = page.search_results.each do |deal|
        
      
        
          1
          
                    deal.merge!(promotion_page: 'Amazon Kindle Monthly Deals $3.99 or Less',
        
      
        
          
          
                                daily_deal_type: "Category Page - #{deal[:category_name]}")
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          4
          
                  if page.scrape_search_page_number == 1 && page.scrape_pagination_urls.present?
        
      
        
          1
          
                    params = page.scrape_pagination_urls.collect(&method(:Array))
        
      
        
          1
          
                    Sidekiq::Client.push_bulk('class' => MongoPromotionPageWorkers::AmazonKindleMonthlyDealsCategoryPageScraper, 'args' => params) if params.present?
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          4
          
                  MongoUtilities.daily_collection(:promotion_pages).insert deals if deals.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class AmazonKindleSelectPageScraper
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          3
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          3
          
                page = AmazonKindleSelectPage.new
        
      
        
          3
          
                if handle_captcha(page, 60)
        
      
        
          3
          
                  deals = page.all_select_titles.each {|deal| deal.merge!(promotion_page: 'Amazon Kindle Select 25')}
        
      
        
          2
          
                  MongoUtilities.daily_collection(:promotion_pages).insert deals if deals.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class BnNookDailyFindPageScraper
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :bn_list_stat_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          1
          
                ProxyUtilities.proxy_setup :barnes_and_noble
        
      
        
          1
          
                page = BnNookDailyFindPage.new
        
      
        
          1
          
                if bn_captcha_sleepy_time(page, 60)
        
      
        
          1
          
                  deals = [page.scrape_daily_find_book.merge!(promotion_page: 'Barnes & Noble Nook Daily Find')]
        
      
        
          2
          
                  deals += page.scrape_daily_find_carousels.each {|deal| deal.merge!(promotion_page: 'Barnes & Noble Nook Daily Find')}
        
      
        
          1
          
                  MongoUtilities.daily_collection(:promotion_pages).insert deals
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class BnNookUnder299PageScraper
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :bn_list_stat_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(start_number)
        
      
        
          2
          
                ProxyUtilities.proxy_setup :barnes_and_noble
        
      
        
          2
          
                page = BnNookUnder299.by_start_number(start_number)
        
      
        
          2
          
                if bn_captcha_sleepy_time(page, 60, start_number)
        
      
        
          2
          
                  deals = page.book_details.each do |deal|
        
      
        
          1
          
                    deal.merge!(promotion_page: 'Barnes & Noble Nook Books Under $2.99')
        
      
        
          
          
                  end
        
      
        
          2
          
                  MongoUtilities.daily_collection(:promotion_pages).insert deals if deals.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.queue_all_pages
        
      
        
          1
          
                params = BnNookUnder299::START_NUMBERS.collect(&method(:Array))
        
      
        
          1
          
                Sidekiq::Client.push_bulk('class' => MongoPromotionPageWorkers::BnNookUnder299PageScraper, 'args' => params)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module MongoReportWorkers
        
      
        
          1
          
            class PromotionReport
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :reporting
        
      
        
          
          
          
        
      
        
          1
          
              def perform(date_string, deliver_email, ftp)
        
      
        
          
          
                date = date_string.to_date
        
      
        
          
          
          
        
      
        
          
          
                client_name = :rhinc
        
      
        
          
          
                report_file_name = "rh-promo-report-#{date.strftime('%m%d%y')}"
        
      
        
          
          
                report_hash = EnterpriseReports.generate_report_hash report_file_name, client_name
        
      
        
          
          
                report_csv = EnterpriseReports.open_csv(report_hash)
        
      
        
          
          
                report_csv << ['Promo Page', 'Daily Deal Type', 'Rank', 'Title', 'Author', 'Price', 'ASIN', 'ISBN', 'BN ID']
        
      
        
          
          
          
        
      
        
          
          
                collection = MongoUtilities.daily_collection(:promotion_pages, date)
        
      
        
          
          
                collection.find({}, timeout: false) do |cursor|
        
      
        
          
          
                  cursor.sort(promotion_page: 1, daily_deal_type: 1, rank: 1).each do |record|
        
      
        
          
          
                    report_csv << [record['promotion_page'], record['daily_deal_type'], record['rank'], record['title'], record['author_name'],
        
      
        
          
          
                                   ReportUtilities.as_price(record['price']), record['asin'], record['isbn'], record['bn_id']]
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                report_csv.flush
        
      
        
          
          
                EnterpriseReports.move_to_s3(client_name, report_csv)
        
      
        
          
          
                EnterpriseReportsMailer.basic_report(report_hash, AmazeBot.config[:reports][:clients][client_name][:reports][:promotions]).deliver if deliver_email
        
      
        
          
          
                EnterpriseReports.ftp_to_client(client_name, report_csv) if ftp
        
      
        
          
          
                report_csv.close
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module MongoWorkers
        
      
        
          1
          
            class GetAmazonProductPageStats
        
      
        
          1
          
              include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, mongo_id, warehouse_book_version_id, asin, ean, tld, itunes_id, book_format)
        
      
        
          
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          
          
                page = AmazonProductPage.by_asin_and_tld asin, tld
        
      
        
          
          
                if handle_captcha(page, 60, collection_name, mongo_id, warehouse_book_version_id, asin, ean, tld, itunes_id, book_format)
        
      
        
          
          
                  stats = StatsScraper.get_amazon_product_page_stats page
        
      
        
          
          
          
        
      
        
          
          
                  # NOTE: if this instance ever crashes, its port will probably not be consumed again when it restarts which means we'll be doing 2x the threads on
        
      
        
          
          
                  # one other port. If we want to avoid this we can rescue all errors here and push the soon to be released port back to the front of the list
        
      
        
          
          
          
        
      
        
          
          
                  #Get categories from page and add them to the category set in redis in case a new one exists
        
      
        
          
          
                  scraped_categories = get_scraped_category_data_from_stats stats, tld
        
      
        
          
          
                  RedisUtilities.add_to_set :scraped_categories, scraped_categories if scraped_categories.present?
        
      
        
          
          
          
        
      
        
          
          
                  $mongodb.collection(collection_name).update({_id: mongo_id},
        
      
        
          
          
                                                              {'$set' => stats.merge!(warehouse_book_version_id: warehouse_book_version_id,
        
      
        
          
          
                                                                                      asin: asin,
        
      
        
          
          
                                                                                      ean: ean,
        
      
        
          
          
                                                                                      tld: tld,
        
      
        
          
          
                                                                                      itunes: itunes_id,
        
      
        
          
          
                                                                                      book_format: book_format,
        
      
        
          
          
                                                                                      amazon_scraped_at: Time.current.utc.to_s)},
        
      
        
          
          
                                                              upsert: true) if stats.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              add_transaction_tracer :perform, :category => :task, :params => '{:collection_name => args[0], :mongo_id => args[1], :warehouse_book_version_id => args[2], :asin => args[3], :ean => args[4], :tld => args[5], :itunes_id => args[6], :book_format => args[7]}'
        
      
        
          
          
          
        
      
        
          
          
              # returns an array of arrays [[name1, id1, tld1], [name2, id2, tld2], ...]
        
      
        
          1
          
              def get_scraped_category_data_from_stats(stats, tld)
        
      
        
          
          
                %w[1 2 3].collect {|num| [stats[:"sub_category#{num}_tree"], stats[:"sub_category#{num}_id"], tld] if stats[:"sub_category#{num}_tree"].present? && stats[:"sub_category#{num}_id"].present?}.compact
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class GetAmazonAuthorPageStats
        
      
        
          1
          
              include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :amazon_author_page_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, mongo_ids, tld, author_asin)
        
      
        
          
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          
          
                author_page = AmazonAuthorPage.by_asin_and_tld(author_asin, tld)
        
      
        
          
          
                if handle_captcha(author_page, 60, collection_name, mongo_ids, tld, author_asin)
        
      
        
          
          
                  stats = StatsScraper.get_amazon_author_page_stats author_page
        
      
        
          
          
          
        
      
        
          
          
                  if stats.present?
        
      
        
          
          
                    stats.merge!(amazon_author_page_scraped_at: Time.current.utc.to_s)
        
      
        
          
          
                    params = mongo_ids.collect {|mongo_id| [collection_name, mongo_id, stats]}
        
      
        
          
          
          
        
      
        
          
          
                    Sidekiq::Client.push_bulk 'class' => MongoWorkers::UpdateAmazonAuthorPageStats, 'args' => params
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class UpdateAmazonAuthorPageStats
        
      
        
          1
          
              include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :mongo_insert
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, mongo_id, data)
        
      
        
          
          
                $mongodb.collection(collection_name).update({_id: mongo_id},
        
      
        
          
          
                                                            {'$set' => data}) if data.present?
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # TODO set up this job to get the lowest print list price, it'll take refactoring one of the scrapes
        
      
        
          
          
            # class GetAmazonLowestPrintListPrice
        
      
        
          
          
            #   include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          
          
            #   include Sidekiq::Worker
        
      
        
          
          
            #   sidekiq_options queue: :captcha_scraper
        
      
        
          
          
            #
        
      
        
          
          
            #   def perform(collection_name, mongo_id, warehouse_book_version_id, asin, ean, tld, itunes_id, book_format, author_asin)
        
      
        
          
          
            #     # If a page has no list price and is a big 6 publisher title and is a kindle edition then manually
        
      
        
          
          
            #     # scrape the related titles for the lowest print list price
        
      
        
          
          
            #     if stats[:amazon_list_price].blank?
        
      
        
          
          
            #       # Queue up lowest print list price job
        
      
        
          
          
            #       stats[:amazon_list_price] = StatsScraper.get_amazon_lowest_print_list_price(page, book_format)
        
      
        
          
          
            #     end
        
      
        
          
          
            #   end
        
      
        
          
          
            # end
        
      
        
          
          
          
        
      
        
          1
          
            class GetBarnesAndNobleStats
        
      
        
          1
          
              include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :bn_stat_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, mongo_id, warehouse_book_version_id, asin, ean, tld, itunes_id, book_format, canonical_bn_url = nil)
        
      
        
          
          
                return unless ean.present? && tld == '.com'
        
      
        
          
          
          
        
      
        
          
          
                ProxyUtilities.proxy_setup :barnes_and_noble
        
      
        
          
          
                page = canonical_bn_url.present? ? BnBookPage.new(canonical_bn_url) : BnBookPage.by_ean(ean)
        
      
        
          
          
          
        
      
        
          
          
                # sleep for 60 seconds and requeue the job if you get throttled
        
      
        
          
          
                if bn_captcha_sleepy_time(page, 60, collection_name, mongo_id, warehouse_book_version_id, asin, ean, tld, itunes_id, book_format, canonical_bn_url)
        
      
        
          
          
                  stats = StatsScraper.get_stats_for_ean page
        
      
        
          
          
          
        
      
        
          
          
                  $mongodb.collection(collection_name).update({_id: mongo_id},
        
      
        
          
          
                                                              {'$set' => stats.merge!(warehouse_book_version_id: warehouse_book_version_id,
        
      
        
          
          
                                                                                      asin: asin,
        
      
        
          
          
                                                                                      ean: ean,
        
      
        
          
          
                                                                                      tld: tld,
        
      
        
          
          
                                                                                      itunes: itunes_id,
        
      
        
          
          
                                                                                      book_format: book_format,
        
      
        
          
          
                                                                                      barnes_and_noble_scraped_at: Time.current.utc.to_s)},
        
      
        
          
          
                                                              upsert: true) if stats.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              add_transaction_tracer :perform, :category => :task, :params => '{:collection_name => args[0], :mongo_id => args[1], :warehouse_book_version_id => args[2], :asin => args[3], :ean => args[4], :tld => args[5], :itunes_id => args[6], :book_format => args[7], :canonical_bn_url => args[8]}'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class GetItunesStats
        
      
        
          1
          
              include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :itunes_stat_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, mongo_id, warehouse_book_version_id, asin, ean, tld, itunes_id, book_format)
        
      
        
          1
          
                return unless itunes_id.present?
        
      
        
          
          
          
        
      
        
          1
          
                stats = StatsScraper.get_stats_for_itunes itunes_id, tld
        
      
        
          
          
          
        
      
        
          1
          
                $mongodb.collection(collection_name).update({_id: mongo_id},
        
      
        
          
          
                                                            {'$set' => stats.merge!(warehouse_book_version_id: warehouse_book_version_id,
        
      
        
          
          
                                                                                    asin: asin,
        
      
        
          
          
                                                                                    ean: ean,
        
      
        
          
          
                                                                                    tld: tld,
        
      
        
          
          
                                                                                    itunes: itunes_id,
        
      
        
          
          
                                                                                    book_format: book_format,
        
      
        
          
          
                                                                                    itunes_scraped_at: Time.current.utc.to_s)},
        
      
        
          1
          
                                                            upsert: true) if stats.present?
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              add_transaction_tracer :perform, :category => :task, :params => '{:collection_name => args[0], :mongo_id => args[1], :warehouse_book_version_id => args[2], :asin => args[3], :ean => args[4], :tld => args[5], :itunes_id => args[6], :book_format => args[7]}'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class GetGoodreadsStats
        
      
        
          1
          
              include NewRelic::Agent::Instrumentation::ControllerInstrumentation
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :goodreads_stat_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(collection_name, mongo_id, warehouse_book_version_id, asin, ean, tld, itunes_id, book_format, canonical_goodreads_url = nil)
        
      
        
          1
          
                return unless (canonical_goodreads_url || ean || asin).present?
        
      
        
          
          
          
        
      
        
          
          
                # these 2 books are broken on goodreads and we can't figure out a better way to block these errors from occurring
        
      
        
          1
          
                return if (warehouse_book_version_id == '532397' || warehouse_book_version_id == '586979')
        
      
        
          
          
          
        
      
        
          1
          
                stats = StatsScraper.get_stats_for_goodreads canonical_goodreads_url, (ean || asin), tld
        
      
        
          
          
          
        
      
        
          1
          
                $mongodb.collection(collection_name).update({_id: mongo_id},
        
      
        
          
          
                                                            {'$set' => stats.merge!(warehouse_book_version_id: warehouse_book_version_id,
        
      
        
          
          
                                                                                    asin: asin,
        
      
        
          
          
                                                                                    ean: ean,
        
      
        
          
          
                                                                                    tld: tld,
        
      
        
          
          
                                                                                    itunes: itunes_id,
        
      
        
          
          
                                                                                    book_format: book_format,
        
      
        
          
          
                                                                                    goodreads_scraped_at: Time.current.utc.to_s)},
        
      
        
          1
          
                                                            upsert: true) if stats.present?
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              add_transaction_tracer :perform, :category => :task, :params => '{:collection_name => args[0], :mongo_id => args[1], :warehouse_book_version_id => args[2], :asin => args[3], :ean => args[4], :tld => args[5], :itunes_id => args[6], :book_format => args[7], :canonical_goodreads_url => args[8]}'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class GermanCompetitiveCoverage
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(asin, book_format, isbn13, work_id)
        
      
        
          
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          
          
                tld = '.de'
        
      
        
          
          
                page = AmazonProductPage.by_asin_and_tld asin, tld
        
      
        
          
          
                if handle_captcha(page, 60, asin, book_format, isbn13, work_id)
        
      
        
          
          
                  # Use the page we already scraped above to get competitive stats for this asin to push into mongo
        
      
        
          
          
                  data = StatsScraper.get_amazon_competitive_stats page
        
      
        
          
          
          
        
      
        
          
          
                  # Merge asin/isbn13/format in so you know what it is when outputting the report
        
      
        
          
          
                  data.merge! book_format: book_format, asin: asin, isbn13: isbn13
        
      
        
          
          
          
        
      
        
          
          
                  # Collect competitive title asins + determine which is featured and then set that in mongo
        
      
        
          
          
                  competitive_details = page.scrape_competitive_related_format_data
        
      
        
          
          
          
        
      
        
          
          
                  # If its a valid page set the featured asin, WorkID and data for this asin/format and run through each competitive
        
      
        
          
          
                  # title and pull its data and put it in mongo.
        
      
        
          
          
                  if competitive_details[:valid_page]
        
      
        
          
          
                    # Set the WorkID and push this title's data onto the document and push the featured asin for this format onto the featured array
        
      
        
          
          
                    MongoUtilities.daily_collection(:de_competitive_format_data, Date.current).update({_id: "#{competitive_details[:parent_asin]}-#{tld}"}, {'$set' => {WorkID: work_id}, '$push' => {rhde_titles: data, featured: competitive_details[:featured_title]}}, upsert: true)
        
      
        
          
          
          
        
      
        
          
          
                    competitive_details[:competitive_titles].each do |title|
        
      
        
          
          
                      MongoWorkers::PopulateGermanCompetitiveData.perform_async title[:asin], competitive_details[:parent_asin], book_format, tld
        
      
        
          
          
                    end
        
      
        
          
          
                  else
        
      
        
          
          
                    # merge in work id and add it to the broken titles document array
        
      
        
          
          
                    data.merge! work_id: work_id
        
      
        
          
          
                    MongoUtilities.daily_collection(:de_competitive_format_data, Date.current).update({_id: 'broken_titles'}, {'$push' => {titles: data}}, upsert: true)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class PopulateGermanCompetitiveData
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(asin, parent_asin, book_format, tld)
        
      
        
          
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          
          
                page = AmazonProductPage.by_asin_and_tld asin, tld
        
      
        
          
          
                if handle_captcha(page, 60, asin, parent_asin, book_format, tld)
        
      
        
          
          
                  data = StatsScraper.get_amazon_competitive_stats page
        
      
        
          
          
          
        
      
        
          
          
                  if data.present?
        
      
        
          
          
                    # Merge asin/isbn13/format in so you know what it is when outputting the report
        
      
        
          
          
                    data = data.merge! book_format: book_format, asin: asin
        
      
        
          
          
          
        
      
        
          
          
                    MongoUtilities.daily_collection(:de_competitive_format_data, Date.current).update({_id: "#{parent_asin}-#{tld}"}, {'$push' => {competitive_titles: data}}, upsert: true)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class ReportGeneratorWorker
        
      
        
          1
          
            include Sidekiq::Worker
        
      
        
          1
          
            sidekiq_options queue: :heavy_reporting
        
      
        
          
          
          
        
      
        
          1
          
            def perform(report_date_string, tld, report_card_config)
        
      
        
          1
          
              report_date = report_date_string.to_date
        
      
        
          1
          
              report_cards = report_card_config.each_pair.collect do |report_name, parameters|
        
      
        
          2
          
                klass = ReportCards::ReportCard.report_card_class_by_report_name(report_name)
        
      
        
          2
          
                klass.new *parameters if klass.present?
        
      
        
          
          
              end.compact
        
      
        
          2
          
              $redis.set('report_generator_run_time', Benchmark.realtime {ReportGenerator.run(report_date, tld, report_cards: report_cards)}) if report_cards.present?
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class Scheduler
        
      
        
          1
          
            include Sidekiq::Worker
        
      
        
          1
          
            sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
            STAGES = %w[starting scrape_stats etl secondary_etl analyze post_etl status_report wrapup done].freeze
        
      
        
          
          
          
        
      
        
          1
          
            def self.hard_reset_daily_scrape_monitor(date_string)
        
      
        
          1
          
              MongoUtilities.scheduler_collection.remove _id: date_string
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def perform(date_string)
        
      
        
          5
          
              @date = date_string.to_date
        
      
        
          
          
          
        
      
        
          5
          
              if @date < Date.current
        
      
        
          2
          
                Rails.logger.tagged('scheduler') {Rails.logger.info 'FAILED FOR A FULL DAY! GET TO WORK, GUSY!!!1'}
        
      
        
          1
          
                return
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          4
          
              initialize_scheduler_state
        
      
        
          4
          
              return if @current_stage == STAGES.last
        
      
        
          
          
          
        
      
        
          3
          
              loop do
        
      
        
          14
          
                process_stage
        
      
        
          14
          
                break if transition == STAGES.last
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def initialize_scheduler_state
        
      
        
          6
          
              doc = MongoUtilities.scheduler_collection.find(_id: @date.to_s).limit(1).first
        
      
        
          6
          
              if doc.present?
        
      
        
          3
          
                @current_stage = doc.fetch('stage', STAGES.first)
        
      
        
          
          
              else
        
      
        
          3
          
                @current_stage = STAGES.first
        
      
        
          3
          
                MongoUtilities.scheduler_collection.insert _id: @date.to_s, stage: @current_stage
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def process_stage
        
      
        
          10
          
              case @current_stage
        
      
        
          
          
                when 'starting'
        
      
        
          2
          
                  MongoUtilities.scheduler_collection.update({_id: @date.to_s}, {'$set' => {'started_at' => Time.current.to_s}})
        
      
        
          2
          
                  MongoUtilities.daily_collection(:stats).ensure_index({warehouse_book_version_id: 1}, name: 'book_version_id_index')
        
      
        
          
          
                when 'scrape_stats'
        
      
        
          1
          
                  queue_stats
        
      
        
          
          
                when 'etl'
        
      
        
          2
          
                  queue_etl
        
      
        
          
          
                when 'secondary_etl'
        
      
        
          
          
                  queue_secondary_etl
        
      
        
          
          
                when 'analyze'
        
      
        
          1
          
                  queue_analyze
        
      
        
          
          
                when 'post_etl'
        
      
        
          1
          
                  queue_post_etl
        
      
        
          
          
                when 'status_report'
        
      
        
          1
          
                  queue_status_report
        
      
        
          
          
                when 'wrapup'
        
      
        
          1
          
                  queue_wrapup
        
      
        
          1
          
                  MongoUtilities.scheduler_collection.update({_id: @date.to_s}, {'$set' => {'completed_at' => Time.current.to_s}})
        
      
        
          
          
                else
        
      
        
          1
          
                  raise 'Uh-oh!'
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def transition
        
      
        
          5
          
              return @current_stage if @current_stage == STAGES.last
        
      
        
          
          
          
        
      
        
          3
          
              @current_stage = STAGES[STAGES.index(@current_stage) + 1]
        
      
        
          3
          
              MongoUtilities.scheduler_collection.update({_id: @date.to_s}, {'$set' => {stage: @current_stage}})
        
      
        
          
          
          
        
      
        
          3
          
              @current_stage
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def queue_stats
        
      
        
          
          
              # TODO: create portlist, we can make the 200 a constant eventually once we settle on this
        
      
        
          2
          
              RedisUtilities.populate_port_list(200)
        
      
        
          
          
          
        
      
        
          
          
              # queue all jobs
        
      
        
          2
          
              [:amazon_statable, :itunes_statable, :bn_statable].each do |key|
        
      
        
          6
          
                RedisUtilities.set_count RedisUtilities.get_scrape_count_key(key), 0
        
      
        
          6
          
                BookVersionWorkers::QueueNightlyScrape.perform_async key
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # RedisUtilities.set_count RedisUtilities.get_scrape_count_key(:amazon_author_page), 0
        
      
        
          
          
              # BookVersionWorkers::QueueAmazonAuthorPageScrape.perform_async
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def queue_etl
        
      
        
          4
          
              wait_on_jobs [BookVersionWorkers::QueueNightlyScrape, BookVersionWorkers::QueueNightlyScrapeBlock, BookVersionWorkers::QueueAmazonAuthorPageScrape, BookVersionWorkers::QueueAmazonAuthorPageScrapeBlock], 300, 1
        
      
        
          
          
          
        
      
        
          4
          
              clear_stale_jobs
        
      
        
          4
          
              EtlWorkers::BookCategoryEtl.perform_async
        
      
        
          4
          
              MaintenanceWorkers::ClearStaleWorkers.perform_async
        
      
        
          4
          
              EtlWorkers::QueueNightlyEtl.perform_async
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def queue_secondary_etl
        
      
        
          2
          
              wait_on_jobs [EtlWorkers::BookCategoryEtl, EtlWorkers::QueueNightlyEtl, EtlWorkers::NightlyStatsEtl, EtlWorkers::NightlyListStatEtl], 300, 1
        
      
        
          
          
          
        
      
        
          2
          
              clear_stale_jobs
        
      
        
          2
          
              EtlWorkers::CategoryStatsEtl.perform_async Date.current.to_s
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def queue_analyze
        
      
        
          2
          
              wait_on_jobs [EtlWorkers::CategoryStatsEtl], 300, 1
        
      
        
          
          
          
        
      
        
          2
          
              clear_stale_jobs
        
      
        
          2
          
              MaintenanceWorkers::RunPostgresAnalyze.perform_async
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def queue_post_etl
        
      
        
          4
          
              wait_on_jobs [MaintenanceWorkers::RunPostgresAnalyze], 300, 1
        
      
        
          
          
          
        
      
        
          
          
              # report config = {email, ftp, gzip, report blocking}
        
      
        
          4
          
              ReportGeneratorWorker.perform_async Date.current.to_s, '.com', {daily_apple: [true, true, false, true], corporate: [true, true, false, true], corporate2: [true, true, false, true]}
        
      
        
          
          
          
        
      
        
          4
          
              BooklrStatWorkers::CreateBooklrStat.perform_async Date.current.to_s
        
      
        
          4
          
              EnterpriseReports::DailyReports::RHPG.perform_async Date.current.to_s
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def queue_status_report
        
      
        
          3
          
              wait_on_jobs [BooklrStatWorkers::CreateBooklrStat, BooklrStatWorkers::SetNumberOfBookVersions, BooklrStatWorkers::SetNumberOfIngestedBookVersions, ReportGeneratorWorker], 300, 1
        
      
        
          
          
          
        
      
        
          3
          
              BackupWorkers::NightlyMongo.perform_async
        
      
        
          3
          
              BooklrStatWorkers::DailyBooklrStatReport.perform_async
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def queue_wrapup
        
      
        
          1
          
              wait_on_jobs [BooklrStatWorkers::DailyBooklrStatReport], 300, 1
        
      
        
          1
          
              $redis.del 'daily_report_stats'
        
      
        
          
          
          
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def wait_on_jobs(jobs, time_to_wait, queue_count)
        
      
        
          14
          
              sleep time_to_wait while Utilities.class_in_sidekiq?(jobs) || Sidekiq::Queue.all.sum(&:size) > queue_count
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def clear_stale_jobs
        
      
        
          8
          
              %w[bn_stat_scraper optimized_scraper amazon_author_page_scraper itunes_stat_scraper goodreads_stat_scraper].each do |queue|
        
      
        
          40
          
                RedisUtilities.clear_sleeping_and_phantom_workers(queue, 3000)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module ScrapeTestWorkers
        
      
        
          1
          
            class RunScrapeTest
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :captcha_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(page_class, mongo_id, url, user_agent = nil)
        
      
        
          
          
                klass = page_class.constantize
        
      
        
          
          
                page = user_agent.present? ? klass.new(url, user_agent) : klass.new(url)
        
      
        
          
          
                method_names = klass.public_instance_methods(false).select {|method| method.to_s.starts_with?('scrape_')}.select do |scrape_method|
        
      
        
          
          
                  page.send(scrape_method).present?
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                if page.ok?
        
      
        
          
          
                  if page_class == 'AmazonProductPage' && page.captcha?
        
      
        
          
          
                    ScrapeTestWorkers::RunScrapeTest.perform_async page_class, mongo_id, url, user_agent
        
      
        
          
          
                    sleep
        
      
        
          
          
                  end
        
      
        
          
          
                  ScrapeTestWorkers::RunScrapeTest.collection.update({_id: mongo_id},
        
      
        
          
          
                                                                     {'$inc' => method_names.each_with_object({}) {|method_name, hash| hash["#{method_name}.count"] = 1}},
        
      
        
          
          
                                                                     upsert: true)
        
      
        
          
          
                else
        
      
        
          
          
                  ScrapeTestWorkers::RunScrapeTest.collection.update({_id: mongo_id},
        
      
        
          
          
                                                                     {'$inc' => {dead_page: 1}},
        
      
        
          
          
                                                                     upsert: true)
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.collection
        
      
        
          
          
                $mongodb["scrape_tests_#{Date.current.to_s.underscore}"]
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.queue
        
      
        
          
          
                amazon_com_count = (WarehouseBookVersion.amazon_statable.com.count * 0.01).ceil
        
      
        
          
          
                amazon_couk_count = (WarehouseBookVersion.amazon_statable.couk.count * 0.5).ceil
        
      
        
          
          
                bn_count = (WarehouseBookVersion.bn_statable.count * 0.01).ceil
        
      
        
          
          
                values = []
        
      
        
          
          
          
        
      
        
          
          
                mongo_id = "#{AmazonProductPage.to_s.underscore}_com"
        
      
        
          
          
                WarehouseBookVersion.amazon_statable.com.value_of(:asin).sample(amazon_com_count).each do |asin|
        
      
        
          
          
                  values << [AmazonProductPage.to_s, mongo_id, Urls.amazon_book_page(asin, '.com')]
        
      
        
          
          
                end
        
      
        
          
          
                collection.update({_id: mongo_id}, {'$set' => {total_scrapes: amazon_com_count}}, upsert: true)
        
      
        
          
          
          
        
      
        
          
          
                mongo_id = "#{AmazonProductPage.to_s.underscore}_couk"
        
      
        
          
          
                WarehouseBookVersion.amazon_statable.couk.value_of(:asin).sample(amazon_couk_count).each do |asin|
        
      
        
          
          
                  values << [AmazonProductPage.to_s, mongo_id, Urls.amazon_book_page(asin, '.couk')]
        
      
        
          
          
                end
        
      
        
          
          
                collection.update({_id: mongo_id}, {'$set' => {total_scrapes: amazon_couk_count}}, upsert: true)
        
      
        
          
          
          
        
      
        
          
          
                mongo_id = BnBookPage.to_s.underscore
        
      
        
          
          
                WarehouseBookVersion.bn_statable.value_of(:asin).sample(bn_count).each do |ean|
        
      
        
          
          
                  values << [BnBookPage.to_s, mongo_id, Urls.bn_book_page(ean)]
        
      
        
          
          
                end
        
      
        
          
          
                collection.update({_id: mongo_id}, {'$set' => {total_scrapes: bn_count}}, upsert: true)
        
      
        
          
          
          
        
      
        
          
          
                Sidekiq::Client.push_bulk('class' => ScrapeTestWorkers::RunScrapeTest, 'args' => values)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class DescriptionCoverage
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scraper
        
      
        
          1
          
              def perform(asin, tld)
        
      
        
          3
          
                page = AmazonProductPage.by_asin_and_tld asin, tld
        
      
        
          
          
          
        
      
        
          3
          
                if page.ok?
        
      
        
          2
          
                  desc = page.scrape_amazon_description
        
      
        
          
          
                  #if description successful, incr descSuccess by 1, or set 1 if key not found
        
      
        
          2
          
                  if desc.present?
        
      
        
          1
          
                    $redis.incr('desc_success')
        
      
        
          
          
                    #max character counter
        
      
        
          1
          
                    $redis.sadd('desc_length_counts',desc.size)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            #class BnScrapeTest
        
      
        
          
          
            #  include Sidekiq::Worker
        
      
        
          
          
            #  sidekiq_options queue: :spider
        
      
        
          
          
            #
        
      
        
          
          
            #  def perform(isbn13)
        
      
        
          
          
            #    agent = Mechanize.new
        
      
        
          
          
            #    agent.user_agent_alias = 'Windows IE 6'
        
      
        
          
          
            #    page = agent.get(Urls.bn_book_page(isbn13)).parser
        
      
        
          
          
            #
        
      
        
          
          
            #    if page.present?
        
      
        
          
          
            #      $redis.sadd('present_pages', isbn13)
        
      
        
          
          
            #      BnBookPageScraper.get_price(page).present? ? $redis.sadd('present_pages_with_price', isbn13) : $redis.sadd('present_pages_no_price', isbn13)
        
      
        
          
          
            #    else
        
      
        
          
          
            #      $redis.sadd('null_pages', isbn13)
        
      
        
          
          
            #    end
        
      
        
          
          
            #  end
        
      
        
          
          
            #end
        
      
        
          
          
          
        
      
        
          
          
            #class BnScrapeWithRescueTest
        
      
        
          
          
            #  include Sidekiq::Worker
        
      
        
          
          
            #  sidekiq_options queue: :spider
        
      
        
          
          
            #
        
      
        
          
          
            #  def perform(isbn13_or_url)
        
      
        
          
          
            #    isbn13 = isbn13_or_url.length == 13 ? isbn13_or_url : isbn13_or_url.split('isbn=').last
        
      
        
          
          
            #    begin
        
      
        
          
          
            #      agent = Mechanize.new
        
      
        
          
          
            #      agent.user_agent_alias = 'Mac FireFox'
        
      
        
          
          
            #      page = isbn13_or_url.length == 13 ? agent.get(Urls.bn_book_page(isbn13)).parser : agent.get(isbn13_or_url).parser
        
      
        
          
          
            #
        
      
        
          
          
            #      if page.present?
        
      
        
          
          
            #        $redis.sadd('present_pages', isbn13)
        
      
        
          
          
            #        BnBookPageScraper.get_price(page).present? ? $redis.sadd('present_pages_with_price', isbn13) : $redis.sadd('present_pages_no_price', isbn13)
        
      
        
          
          
            #      else
        
      
        
          
          
            #        $redis.sadd('null_pages', isbn13)
        
      
        
          
          
            #      end
        
      
        
          
          
            #    rescue *HTTP_ERRORS => e
        
      
        
          
          
            #      $redis.sadd('null_pages', isbn13)
        
      
        
          
          
            #      $redis.incr(e.class.to_s)
        
      
        
          
          
            #      Rails.logger.tagged('httperror') {Rails.logger.info "#{isbn13} - http error: #{e}"}
        
      
        
          
          
            #    end
        
      
        
          
          
            #  end
        
      
        
          
          
            #end
        
      
        
          
          
          end

    
      
        
          1
          
          module SpiderWorkers
        
      
        
          1
          
            class QueueCategorySpidering
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform(base_category_name, recent, pages_to_scrape)
        
      
        
          2
          
                Rails.logger.tagged('spidering') {Rails.logger.info "'#{base_category_name}' Scrape Started - recent check enabled?: #{recent}"}
        
      
        
          1
          
                params = WarehouseCategory.amazon.canonical.where("name LIKE '#{base_category_name}'").value_of(:category_id).each_with_object([]) do |category_id, array|
        
      
        
          1
          
                  (1..pages_to_scrape).collect do |count|
        
      
        
          3
          
                    array << [count, category_id, recent]
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          1
          
                params.each_slice(12000) do |value_slice|
        
      
        
          1
          
                  Sidekiq::Client.push_bulk('class' => SpiderWorkers::CollectAsins, 'args' => value_slice)
        
      
        
          
          
                end
        
      
        
          2
          
                Rails.logger.tagged('spidering') {Rails.logger.info "'#{base_category_name}' Scrapes Finished Queueing - recent check enabled?: #{recent}"}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class CollectAsins
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          1
          
              def perform(page_num, category_id, recent)
        
      
        
          5
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          5
          
                uri_str = "http://www.amazon.com/s?ie=UTF8&page=#{page_num}&rh=n:#{category_id}"
        
      
        
          5
          
                uri_str += ',p_n_date:1249100011' if recent
        
      
        
          5
          
                page = AmazonSearchPage.new uri_str
        
      
        
          5
          
                if handle_captcha(page, 60, page_num, category_id, recent)
        
      
        
          4
          
                  spidered_asins = page.scrape_search_result_asins
        
      
        
          
          
          
        
      
        
          
          
                  # clean out anything that isn't a valid asin or already exists on our master list
        
      
        
          4
          
                  if spidered_asins.present?
        
      
        
          6
          
                    spidered_asins = spidered_asins.reject {|asin| Utilities.determine_key_type(asin) == nil}
        
      
        
          5
          
                    asins_to_add = spidered_asins.reject {|asin| MongoUtilities.exists_in_all_asin_list? asin, '.com'}
        
      
        
          
          
          
        
      
        
          
          
                    # add the rest to the redis list which will get consumed later
        
      
        
          3
          
                    $redis.sadd('new_asins', asins_to_add) if asins_to_add.present?
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueValidateAndIngestAsins
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          
          
              # Queues validation and ingestion jobs for the spidered asins that result from QueueCategorySpidering
        
      
        
          
          
              # if count is enabled it will set a flag that the queued jobs just do counts
        
      
        
          
          
              # if check_asins is true it will make sure not to validate any books we already have
        
      
        
          1
          
              def perform(format, check_asins = true, count = false)
        
      
        
          6
          
                Rails.logger.tagged('spidering') {Rails.logger.info "#{format} validation and ingestion started on #{$redis.scard('new_asins')} new asins"}
        
      
        
          
          
          
        
      
        
          
          
                # get all asins scraped from spidering and subtract the existing asins from this list to get the uningested list
        
      
        
          3
          
                asins = $redis.smembers('new_asins')
        
      
        
          3
          
                asins_uningested = check_asins ? asins - WarehouseBookVersion.com.where(asin: asins).value_of(:asin) : asins
        
      
        
          
          
          
        
      
        
          
          
                #subtract the uningested ones from the redis list to determine which ones to remove from redis
        
      
        
          3
          
                to_remove = asins - asins_uningested
        
      
        
          3
          
                $redis.srem('new_asins', to_remove) if check_asins && to_remove.present?
        
      
        
          
          
          
        
      
        
          5
          
                asins_uningested.collect {|asin| [asin, format, count]}.each_slice(RedisUtilities::BLOCK_SIZE) do |value_slice|
        
      
        
          2
          
                  Sidekiq::Client.push_bulk('class' => SpiderWorkers::ValidateAndIngestAsins, 'args' => value_slice)
        
      
        
          
          
                end
        
      
        
          6
          
                Rails.logger.tagged('spidering') {Rails.logger.info "#{format} validation and ingestion finished queuing, #{$redis.scard('new_asins')} new asins remain"}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class ValidateAndIngestAsins
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :optimized_scraper
        
      
        
          
          
          
        
      
        
          
          
              # validate that each book is valid and below 2m rank before ingesting (or increasing count)
        
      
        
          1
          
              def perform(asin, format, count = false)
        
      
        
          7
          
                ProxyUtilities.proxy_setup :amazon
        
      
        
          7
          
                tld = '.com'
        
      
        
          
          
          
        
      
        
          7
          
                amazon_page = AmazonProductPage.new Urls.amazon_book_page(asin, tld), 'Windows IE 6'
        
      
        
          7
          
                if handle_captcha(amazon_page, 60, asin, format, count)
        
      
        
          6
          
                  rank = amazon_page.scrape_sales_rank
        
      
        
          6
          
                  scraped_format = amazon_page.scrape_book_format
        
      
        
          
          
          
        
      
        
          
          
                  # if a asin passes create a new book version and remove it from the redis set
        
      
        
          
          
                  # else remove it from the redis set and add it to the overflow set
        
      
        
          
          
                  # if count is set just incr counts and dont do any real work
        
      
        
          6
          
                  if scraped_format.present? && scraped_format.include?(format) && rank.to_i < 2000000
        
      
        
          2
          
                    if count
        
      
        
          1
          
                      $redis.incr('valid_asins')
        
      
        
          
          
                    else
        
      
        
          1
          
                      Sidekiq::Client.push('queue' => 'background', 'class' => 'BookVersionWorkers::Create', 'args' => [{asin: asin, book_format: scraped_format, source: 'amazon-spidering', status: 'validated', tld: tld}])
        
      
        
          1
          
                      $redis.srem 'new_asins', asin
        
      
        
          
          
                    end
        
      
        
          
          
                  else
        
      
        
          4
          
                    if count
        
      
        
          2
          
                      $redis.incr('invalid_asins')
        
      
        
          
          
                    else
        
      
        
          2
          
                      $redis.srem 'new_asins', asin
        
      
        
          2
          
                      $redis.sadd 'filtered_asins', asin
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module WarehouseCategoryWorkers
        
      
        
          1
          
            class CreateAmazon
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform(params)
        
      
        
          2
          
                params = params.with_indifferent_access
        
      
        
          2
          
                unless WarehouseCategory.amazon.where(name: params[:name], category_id: params[:category_id], tld: params[:tld]).exists?
        
      
        
          1
          
                  params[:depth] = Utilities.get_depth_from_category_name params[:name]
        
      
        
          1
          
                  params[:parent_id] = WarehouseCategory.get_parent_id_for_category_name params[:name], params[:tld]
        
      
        
          1
          
                  WarehouseCategory.amazon.create! params
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class QueueScrapes
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :scheduling
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          2
          
                AmazonCategoryCollection.new.prepare_for_scraping
        
      
        
          
          
          
        
      
        
          2
          
                params = WarehouseCategory.amazon.where(name: AmazonBestSellersPage::ACCEPTABLE_BASE_CATEGORIES).value_of(:category_id, :name, :tld).collect do |category_id, name, tld|
        
      
        
          6
          
                  [category_id, name, tld]
        
      
        
          
          
                end
        
      
        
          2
          
                Sidekiq::Client.push_bulk 'class' => MongoBookCategoryWorkers::AmazonCategoryScraper, 'args' => params
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class UpdateStatusAmazon
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :sync
        
      
        
          
          
          
        
      
        
          1
          
              def perform
        
      
        
          5
          
                arel_table = WarehouseCategory.arel_table
        
      
        
          5
          
                scraped_category_data = AmazonCategoryCollection.new.find.to_a
        
      
        
          
          
          
        
      
        
          
          
                # Group scraped category data by status
        
      
        
          5
          
                scraped_category_data_by_status = scraped_category_data.each_with_object({alternative: [], deleted: [], canonical: []}.with_indifferent_access) do |data, hash|
        
      
        
          9
          
                  hash[data['status']] << [data['category_id'], data['category_name'], data['tld']]
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                # Add categories found in the DW but not in the category collection to the deleted group
        
      
        
          10
          
                dw_category_data = WarehouseCategory.amazon.where{status != 'deleted'}.value_of(:category_id, :name, :tld)
        
      
        
          14
          
                data_to_match = scraped_category_data.collect {|data| [data['category_id'], data['category_name'], data['tld']]}
        
      
        
          5
          
                (dw_category_data - data_to_match).each do |data|
        
      
        
          1
          
                  scraped_category_data_by_status[:deleted] << data
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                # Update statuses
        
      
        
          5
          
                scraped_category_data_by_status.each_pair do |status, data|
        
      
        
          15
          
                  conditions = data.collect do |category_id, name, tld|
        
      
        
          10
          
                    arel_table[:category_id].eq(category_id).and(arel_table[:name].eq(name)).and(arel_table[:tld].eq(tld))
        
      
        
          
          
                  end
        
      
        
          
          
                  # Need to break up the update call into manageable slices for the DB
        
      
        
          15
          
                  conditions.each_slice(100) do |condition_slice|
        
      
        
          10
          
                    final_condition = condition_slice.reduce {|intermediate, condition| intermediate.or(condition)}
        
      
        
          7
          
                    timestamp = Time.current
        
      
        
          7
          
                    updated_fields = {status: status, updated_at: timestamp}
        
      
        
          7
          
                    updated_fields[:canonical_category_id] = nil if status == 'canonical' # Remove canonical category association for any canonical category
        
      
        
          7
          
                    WarehouseCategory.amazon.where(final_condition).update_all(updated_fields)
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                # Update all alternative and deleted categories with their canonical_category
        
      
        
          10
          
                non_canonical_categories = WarehouseCategory.amazon.where{(status == 'alternative') | (status == 'deleted')}.includes(:canonical_category)
        
      
        
          15
          
                canonical_category_ids_by_category_id_and_tld = WarehouseCategory.amazon.canonical.where(category_id: non_canonical_categories.collect(&:category_id)).each_with_object(Hash[Utilities::TLDS.zip(Array.new(Utilities::TLDS.count) {Hash.new})]) do |category, hash|
        
      
        
          3
          
                  hash[category.tld][category.category_id] = category.id
        
      
        
          
          
                end
        
      
        
          5
          
                non_canonical_categories.each do |category|
        
      
        
          7
          
                  if category.canonical_category.blank? || category.canonical_category.category_id != category.category_id
        
      
        
          7
          
                    canonical_category_id = canonical_category_ids_by_category_id_and_tld[category.tld][category.category_id]
        
      
        
          7
          
                    if canonical_category_id.present?
        
      
        
          2
          
                      category.canonical_category_id = canonical_category_id
        
      
        
          2
          
                      category.save
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class FillInWarehouseCategoryParent
        
      
        
          1
          
              include Sidekiq::Worker
        
      
        
          1
          
              sidekiq_options queue: :high
        
      
        
          
          
          
        
      
        
          1
          
              def perform(warehouse_category_id)
        
      
        
          1
          
                warehouse_category = WarehouseCategory.find warehouse_category_id
        
      
        
          1
          
                warehouse_category.set_parent_id
        
      
        
          1
          
                warehouse_category.save!
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            #TODO Needs to be redone very slightly, BarnesNobleBookCategory no longer exists
        
      
        
          
          
            #class FindNewTitles
        
      
        
          
          
            #  include Sidekiq::Worker
        
      
        
          
          
            #  sidekiq_options queue: :background
        
      
        
          
          
            #
        
      
        
          
          
            #  def perform(bn_book_category_id)
        
      
        
          
          
            #    ActiveRecord::Base.connection.uncached do
        
      
        
          
          
            #      bn_book_category = BarnesNobleBookCategory.find bn_book_category_id
        
      
        
          
          
            #      book_formats = bn_book_category.physical_category? ? %w[Hardcover Paperback] : ["NOOK Book"]
        
      
        
          
          
            #
        
      
        
          
          
            #      (1..11).each do |page_number|
        
      
        
          
          
            #        book_formats.each do |book_format|
        
      
        
          
          
            #          page = BnCategoryPageScraper.get_page bn_book_category.category_id, book_format, page_number
        
      
        
          
          
            #
        
      
        
          
          
            #          ean_list = []
        
      
        
          
          
            #
        
      
        
          
          
            #          page.css("li.result").each do |li|
        
      
        
          
          
            #            div = li.css("div")
        
      
        
          
          
            #            ean = div.first["data-bn-ean"]
        
      
        
          
          
            #            format = div.css("div.price-format > a > span.format").text
        
      
        
          
          
            #            ean_list << ean if ISBN_Tools.is_valid_isbn13?(ean) && format.include?(book_format)
        
      
        
          
          
            #          end
        
      
        
          
          
            #
        
      
        
          
          
            #          existing_isbns = BookVersion.where(isbn13: ean_list, tld: '.com').collect(&:isbn13)
        
      
        
          
          
            #          creations = ean_list - existing_isbns
        
      
        
          
          
            #
        
      
        
          
          
            #          # Turn this into a batch insert if we want to optimize, persist format as it is unless its Nook then use Kindle Edition
        
      
        
          
          
            #          book_format_persist = book_format.include?("NOOK Book") ? "Kindle Edition" : book_format
        
      
        
          
          
            #          count = $redis.get("creation_count").to_i
        
      
        
          
          
            #          $redis.set("creation_count",count+creations.count) unless creations.blank?
        
      
        
          
          
            #          #creations.each {|ean| BookVersion.create(isbn13: ean, source: 'bn-category-spidering', book_format: book_format_persist)}
        
      
        
          
          
            #        end
        
      
        
          
          
            #      end
        
      
        
          
          
            #    end
        
      
        
          
          
            #  end
        
      
        
          
          
            #end
        
      
        
          
          
          
        
      
        
          
          
            # TODO: BROKEN, calls AmazonBookCategory, wtf?
        
      
        
          
          
            #class CreateSubcategories
        
      
        
          
          
            #  include Sidekiq::Worker
        
      
        
          
          
            #  sidekiq_options queue: :high
        
      
        
          
          
            #
        
      
        
          
          
            #  def perform(bn_book_category_id)
        
      
        
          
          
            #    ActiveRecord::Base.connection.uncached do
        
      
        
          
          
            #      bn_book_category = BarnesNobleBookCategory.find bn_book_category_id
        
      
        
          
          
            #      page = BnCategoryPageScraper.get_page bn_book_category.category_id, nil, nil
        
      
        
          
          
            #
        
      
        
          
          
            #      # Check if sub categories exist before we try to create categories
        
      
        
          
          
            #      if page.css('.search-filter ul.filter-value-set').present? && page.css('.search-filter div.filter-name').first.text.include?("In ")
        
      
        
          
          
            #        # first ul.filter-value-set is the category block, rest are other aref filters
        
      
        
          
          
            #        page.css('.search-filter ul.filter-value-set').first.css('li > a').each do |element|
        
      
        
          
          
            #          category_id = URI.unescape(element['href']).split('dref=').last
        
      
        
          
          
            #          category_name = element.text.strip
        
      
        
          
          
            #          depth = category_id.count ","
        
      
        
          
          
            #          bn_book_category = BarnesNobleBookCategory.where(category_id: category_id, parent_id: bn_book_category_id, name: category_name, depth: depth).first_or_create unless category_id.blank? || AmazonBookCategory.where(category_id: category_id).exists?
        
      
        
          
          
            #          BarnesNobleListStatWorkers::CreateSubcategories.perform_async bn_book_category.id
        
      
        
          
          
            #        end
        
      
        
          
          
            #      end
        
      
        
          
          
            #    end
        
      
        
          
          
            #  end
        
      
        
          
          
            #end
        
      
        
          
          
          end

    
      
        
          1
          
          module AmazeBot
        
      
        
          1
          
            mattr_accessor :config
        
      
        
          1
          
            @@config = {}
        
      
        
          
          
          end

    
      
        
          1
          
          module Bitmaskable
        
      
        
          1
          
            class Definition
        
      
        
          1
          
              attr_reader :attribute, :values, :extension
        
      
        
          1
          
              def initialize(attribute, values = [], options = {})
        
      
        
          1
          
                @attribute = attribute
        
      
        
          1
          
                @values = values
        
      
        
          1
          
                @options = options
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def install_on(model)
        
      
        
          1
          
                override model
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              #######
        
      
        
          1
          
              private
        
      
        
          
          
              #######
        
      
        
          
          
          
        
      
        
          1
          
              def override(model)
        
      
        
          
          
                # override getter
        
      
        
          1
          
                model.class_eval %(
        
      
        
          
          
                  def #{@options[:as] || attribute}
        
      
        
          
          
                    #{@values}.reject {|r| ((#{attribute} || 0) & (1 << #{@values}.index(r))).zero? }
        
      
        
          
          
                  end
        
      
        
          
          
                )
        
      
        
          
          
          
        
      
        
          
          
                # override setter
        
      
        
          1
          
                model.class_eval %(
        
      
        
          
          
                  def #{@options[:as] || attribute}=(roles)
        
      
        
          
          
                    string_roles = Array.wrap(roles).map {|t| t.to_s.downcase}
        
      
        
          
          
                    send :write_attribute, :#{attribute}, ((string_roles & #{@values}).map { |r| 1 << #{@values}.index(r) }.sum)
        
      
        
          
          
                  end
        
      
        
          
          
                )
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            module ClassMethods
        
      
        
          1
          
              def bitmask(attribute, options={})
        
      
        
          1
          
                raise ArgumentError, "Must provide an Array :as option" unless options[:values] && options[:values].kind_of?(Array)
        
      
        
          
          
          
        
      
        
          1
          
                bitmask_definitions[attribute] = Bitmaskable::Definition.new(attribute, options.delete(:values).to_a, options)
        
      
        
          1
          
                bitmask_definitions[attribute].install_on(self)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def bitmask_definitions
        
      
        
          2
          
                @bitmask_definitions ||= {}
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end
        
      
        
          
          
          
        
      
        
          1
          
          ActiveRecord::Base.extend Bitmaskable::ClassMethods

    
      
        
          1
          
          module ActiveRecord
        
      
        
          1
          
            class Base
        
      
        
          1
          
              attr_accessible
        
      
        
          
          
          
        
      
        
          1
          
              def self.value_of(*args)
        
      
        
          2062
          
                columns = args.each_with_object({}) {|arg, hash| hash[arg] = columns_hash[arg.to_s]}
        
      
        
          1386
          
                values = connection.execute(connection.unprepared_statement {select(args).to_sql}).collect do |res|
        
      
        
          807
          
                  args.collect {|arg| columns[arg].type_cast(res[arg.to_s])}
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          693
          
                args.count == 1 ? values.flatten : values
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.batch_insert(column_names, array_of_attributes)
        
      
        
          72
          
                unless column_names.all? {|column_name| self.column_names.include? column_name}
        
      
        
          3
          
                  raise ArgumentError.new("Column(s) provided nonexistent: #{column_names.collect {|column_name| column_name unless self.column_names.include? column_name}.compact.join(', ')}")
        
      
        
          
          
                end
        
      
        
          39
          
                raise ArgumentError.new('Wrong number of values to insert') unless array_of_attributes.all? {|attributes| attributes.size == column_names.size}
        
      
        
          16
          
                local_column_names = Marshal.load(Marshal.dump(column_names))
        
      
        
          16
          
                local_array_of_attributes = Marshal.load(Marshal.dump(array_of_attributes))
        
      
        
          16
          
                if columns_hash['type'].present? && !local_column_names.include?('type')
        
      
        
          
          
                  local_column_names << 'type'
        
      
        
          
          
                  local_array_of_attributes.each {|attributes| attributes << to_s}
        
      
        
          
          
                end
        
      
        
          16
          
                timestamp = Time.current
        
      
        
          16
          
                if columns_hash['created_at'].present? && !local_column_names.include?('created_at')
        
      
        
          7
          
                  local_column_names << 'created_at'
        
      
        
          18
          
                  local_array_of_attributes.each {|attributes| attributes << timestamp}
        
      
        
          
          
                end
        
      
        
          16
          
                if columns_hash['updated_at'].present? && !local_column_names.include?('updated_at')
        
      
        
          7
          
                  local_column_names << 'updated_at'
        
      
        
          18
          
                  local_array_of_attributes.each {|attributes| attributes << timestamp}
        
      
        
          
          
                end
        
      
        
          81
          
                columns = local_column_names.map {|name| columns_hash[name.to_s]}
        
      
        
          81
          
                columns_sql = "(#{local_column_names.map{|name| connection.quote_column_name(name) }.join(',')})"
        
      
        
          16
          
                insert_sql = "INSERT INTO #{quoted_table_name} #{columns_sql} VALUES "
        
      
        
          16
          
                values_sql = local_array_of_attributes.map do |arr|
        
      
        
          20
          
                  my_values = arr.each_with_index.map do |val,j|
        
      
        
          82
          
                    column = columns[j]
        
      
        
          82
          
                    (!sequence_name.blank? && column.name == primary_key && val.nil?) ? %{#{sequence_name}.nextval} : connection.quote(column.type_cast(val), column)
        
      
        
          
          
                  end
        
      
        
          20
          
                  "(#{my_values.join(',')})"
        
      
        
          
          
                end
        
      
        
          16
          
                sql2insert = insert_sql + values_sql.join( ',' )
        
      
        
          16
          
                sql, binds = connection.sql_for_insert(connection.to_sql(sql2insert), nil, nil, nil, [])
        
      
        
          16
          
                res = connection.exec_insert sql, 'Batch Insert', binds
        
      
        
          16
          
                res.rows.flatten.collect(&:to_i)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.outer_joins(*args)
        
      
        
          44
          
                joins(args.flatten.collect {|association_name| generate_join(Arel::Nodes::OuterJoin, association_name)})
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.join_select(join_type, auto_transform, join_selects)
        
      
        
          17
          
                throw ArgumentError unless %w[inner outer].include?(join_type.to_s)
        
      
        
          17
          
                throw ArgumentError unless join_selects.is_a?(Array) || join_selects.is_a?(Hash)
        
      
        
          
          
          
        
      
        
          17
          
                select(join_selects.collect do |association_name, fields_or_transforms|
        
      
        
          38
          
                  table = reflect_on_association(association_name).klass.arel_table
        
      
        
          38
          
                  table = table.alias(association_name) if join_type.to_s == 'outer'
        
      
        
          38
          
                  fields_or_transforms.collect do |field_or_transform|
        
      
        
          229
          
                    field, transform = field_or_transform
        
      
        
          229
          
                    transform ||= {}
        
      
        
          229
          
                    transform.reverse_merge!(as: "#{association_name}_#{field}")
        
      
        
          229
          
                    if auto_transform
        
      
        
          1
          
                      transform.reverse_merge! transform_name: :price if field.to_s.include?('price')
        
      
        
          
          
                    end
        
      
        
          229
          
                    transform_select_statement table, field, transform
        
      
        
          
          
                  end
        
      
        
          
          
                end.flatten!)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.transforming_select(auto_transform, fields_or_transforms = {})
        
      
        
          8
          
                throw ArgumentError unless fields_or_transforms.is_a?(Array) || fields_or_transforms.is_a?(Hash)
        
      
        
          
          
          
        
      
        
          8
          
                select(fields_or_transforms.collect do |field_or_transform|
        
      
        
          137
          
                  field, transform = field_or_transform
        
      
        
          137
          
                  transform ||= {}
        
      
        
          137
          
                  if auto_transform
        
      
        
          132
          
                    transform.reverse_merge! transform_name: :price if field.to_s.include?('price')
        
      
        
          
          
                  end
        
      
        
          137
          
                  transform_select_statement arel_table, field, transform
        
      
        
          
          
                end)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.single_query_join_and_select(base_model_select_statements, inner_joins, outer_joins)
        
      
        
          15
          
                joins = []
        
      
        
          15
          
                select_statements = base_model_select_statements == '*' ? [arel_table[Arel.star]] : Array.wrap(base_model_select_statements.try(:dup))
        
      
        
          
          
          
        
      
        
          
          
                inner_joins.each_pair do |association_name, select_values|
        
      
        
          23
          
                  joins << generate_join(Arel::Nodes::InnerJoin, association_name)
        
      
        
          23
          
                  select_statements << generate_select(select_values, association_name) unless select_values.blank?
        
      
        
          15
          
                end if inner_joins.present?
        
      
        
          
          
          
        
      
        
          
          
                outer_joins.each_pair do |association_name, select_values|
        
      
        
          40
          
                  joins << generate_join(Arel::Nodes::OuterJoin, association_name)
        
      
        
          40
          
                  select_statements << generate_select(select_values, association_name) unless select_values.blank?
        
      
        
          15
          
                end if outer_joins.present?
        
      
        
          
          
          
        
      
        
          15
          
                select(select_statements).joins(joins)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              private
        
      
        
          
          
          
        
      
        
          1
          
              def self.generate_join(join_type, association_name)
        
      
        
          96
          
                table = arel_table
        
      
        
          96
          
                reflection = reflect_on_association(association_name)
        
      
        
          96
          
                join_table = reflection.klass.arel_table.alias(association_name)
        
      
        
          96
          
                condition = reflection.belongs_to? ? join_table[reflection.active_record_primary_key].eq(table[reflection.foreign_key]) : join_table[reflection.foreign_key].eq(table[reflection.active_record_primary_key])
        
      
        
          96
          
                table.create_join(join_table, table.create_on(condition), join_type)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.transform_select_statement(table, field_name, transform_details)
        
      
        
          366
          
                return table[field_name] if transform_details.blank?
        
      
        
          
          
          
        
      
        
          250
          
                case transform_details[:transform_name]
        
      
        
          
          
                  when :price
        
      
        
          22
          
                    Arel::Nodes::NamedFunction.new('trunc', [table[field_name] / 100.0, 2])
        
      
        
          
          
                  else
        
      
        
          228
          
                    table[field_name]
        
      
        
          
          
                end.as Arel.sql("\"#{(transform_details[:as].presence || field_name)}\"")
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.generate_select(select_values, association_name)
        
      
        
          63
          
                return if select_values.blank?
        
      
        
          
          
          
        
      
        
          63
          
                reflection = reflect_on_association(association_name)
        
      
        
          63
          
                join_table = reflection.klass.arel_table.alias(association_name)
        
      
        
          63
          
                if select_values.is_a? String
        
      
        
          
          
                  select_values
        
      
        
          126
          
                elsif select_values.all? {|select_value| select_value.starts_with? '!'}
        
      
        
          
          
                  ignored_columns = select_values.map {|select_value| select_value[1..-1]}
        
      
        
          
          
                  reflection.klass.column_names.collect {|column_name| join_table[column_name].as "#{association_name}_#{column_name}" unless ignored_columns.include? column_name}.compact
        
      
        
          
          
                else
        
      
        
          183
          
                  select_values.collect {|column_name| join_table[column_name].as "#{association_name}_#{column_name}"}
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          
          
          # Add methods to Enumerable, which makes them available to Array
        
      
        
          1
          
          class Array
        
      
        
          1
          
            def mean
        
      
        
          11
          
              sum / count.to_f
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def median
        
      
        
          4
          
              index = size == 0 ? 0 : ((size - 1) / 2.0)
        
      
        
          4
          
              sort[index.floor..index.ceil].mean
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # returns a hash of the frequencies of each unique element in the array
        
      
        
          1
          
            def frequencies
        
      
        
          18
          
              each_with_object(Hash.new(0)) {|element, hash| hash[element] += 1}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Returns an array of all modes
        
      
        
          1
          
            def mode
        
      
        
          2
          
              freq = frequencies
        
      
        
          2
          
              max_count = freq.values.max
        
      
        
          11
          
              freq.select {|_, count| count == max_count}.keys.sort
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def sample_variance
        
      
        
          2
          
              avg = mean
        
      
        
          12
          
              count > 1 ? inject(0){|accum, i| accum + (i - avg) ** 2 } / (count - 1).to_f : 0.0
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def standard_deviation
        
      
        
          1
          
              Math.sqrt(sample_variance)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Calculates the moving average given an integer as a increment period
        
      
        
          1
          
            def moving_average(span = 1)
        
      
        
          1
          
              each_cons(span).collect(&:mean)
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class Date
        
      
        
          1
          
            def self.parse_international(string)
        
      
        
          1
          
              parse(month_to_english(string))
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def self.make_hash(names)
        
      
        
          4
          
              Hash[([nil]+names).zip(MONTHNAMES)]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            MONTH_TRANSLATIONS = {}
        
      
        
          1
          
            MONTH_TRANSLATIONS.merge! make_hash(%w/janvier février mars avril mai juin juillet août septembre octobre novembre décembre/) # French
        
      
        
          1
          
            MONTH_TRANSLATIONS.merge! make_hash(%w/januar februar märz april mai juni juli august september oktober november dezember/) # German
        
      
        
          1
          
            MONTH_TRANSLATIONS.merge! make_hash(%w/gennaio febbraio marzo aprile maggio giugno luglio agosto settembre ottobre novembre dicembre/) # Italian
        
      
        
          1
          
            MONTH_TRANSLATIONS.merge! make_hash(%w/enero febrero marzo abril mayo junio julio agosto septiembre octubre noviembre diciembre/) # Spanish
        
      
        
          1
          
            MONTH_TRANSLATIONS.freeze
        
      
        
          
          
          
        
      
        
          1
          
            def self.month_to_english(string)
        
      
        
          1
          
              month_from = string[/[^\s\d,]+/i] # Search for a month name
        
      
        
          1
          
              if month_from
        
      
        
          1
          
                month_to = MONTH_TRANSLATIONS[month_from.downcase] # Look up the translation
        
      
        
          1
          
                return string.sub(month_from, month_to.to_s) if month_to
        
      
        
          
          
              end
        
      
        
          
          
              return string
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module HerokuMongoBackup
        
      
        
          1
          
            def self.load_from_file(backup_location, block_size = 1250)
        
      
        
          1
          
              count = 1
        
      
        
          
          
          
        
      
        
          1
          
              open("#{File.basename(backup_location)}", 'wb') do |file|
        
      
        
          1
          
                file << open(backup_location,:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE).read
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # Iterate through the gzipped backup yielding records by iterating per separator !-!
        
      
        
          1
          
              Zlib::GzipReader.open("#{File.basename(backup_location)}").each('!-!') do |record|
        
      
        
          
          
                # Unmarshal the string
        
      
        
          2
          
                record_block = Marshal.load(record)
        
      
        
          2
          
                collection = record_block.keys.first
        
      
        
          
          
          
        
      
        
          4
          
                Rails.logger.tagged('backup') {Rails.logger.info "Restoring from #{collection}: block #{(count-1)*block_size} - #{count*block_size}"}
        
      
        
          
          
          
        
      
        
          2
          
                record_block[collection].each_slice(block_size) do |slice|
        
      
        
          
          
                  # append -restored to any collection you insert to so you don't clobber the existing data if it still exists
        
      
        
          4
          
                  $mongodb.collection(collection+'-restored').insert slice
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          2
          
                count += 1
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            class Backup
        
      
        
          1
          
              def store(collections, block_size)
        
      
        
          1
          
                file = File.new(@file_name, 'wb')
        
      
        
          1
          
                file = Zlib::GzipWriter.new(file)
        
      
        
          
          
          
        
      
        
          1
          
                collections.each do |col|
        
      
        
          2
          
                  Rails.logger.tagged('backup') {Rails.logger.info "Backing up collection: #{col.name}"}
        
      
        
          
          
          
        
      
        
          1
          
                  col.find({}, timeout: false) do |cursor|
        
      
        
          1
          
                    total = col.count
        
      
        
          1
          
                    i = 1
        
      
        
          1
          
                    cursor.each_slice(block_size) do |slice|
        
      
        
          4
          
                      Rails.logger.tagged('backup') {Rails.logger.info "#{i*block_size} out of #{total}"}
        
      
        
          2
          
                      i += 1
        
      
        
          2
          
                      backup = {}
        
      
        
          
          
          
        
      
        
          
          
                      # Add each block of records to a hash associated with the key of the collection
        
      
        
          2
          
                      backup[col.name] = slice
        
      
        
          
          
          
        
      
        
          2
          
                      file.print Marshal.dump(backup)
        
      
        
          2
          
                      file.print '!-!'
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          
          
                end
        
      
        
          
          
          
        
      
        
          
          
                # cleanup file
        
      
        
          1
          
                file.close
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def db_connect
        
      
        
          7
          
                @db = $mongodb
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def initialize date = Date.current, connect = true
        
      
        
          8
          
                @date = date
        
      
        
          
          
                # date-of-data--date-created|time-created.gz
        
      
        
          
          
                # 2014-02-11--2014-02-14|12:16:19.gz
        
      
        
          8
          
                @file_name = Time.current.strftime("#{@date.to_s}.gz")
        
      
        
          8
          
                @dirname = 'backups'
        
      
        
          
          
          
        
      
        
          8
          
                self.db_connect
        
      
        
          8
          
                self.s3_connect if connect
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def backup(collections, block_size = 1000)
        
      
        
          
          
                #self.chdir
        
      
        
          2
          
                self.store(collections, block_size)
        
      
        
          2
          
                self.s3_upload
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def s3_upload
        
      
        
          1
          
                HerokuMongoBackup::s3_upload(@bucket, @dirname, @file_name)
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def s3_connect
        
      
        
          8
          
                @bucket = HerokuMongoBackup::s3_connect(AmazeBot.config[:carrier_wave][:fog_directory][Utilities.env],
        
      
        
          
          
                                                        AmazeBot.config[:carrier_wave][:fog_credentials][:aws_access_key_id],
        
      
        
          
          
                                                        AmazeBot.config[:carrier_wave][:fog_credentials][:aws_secret_access_key])
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class Rufus::Scheduler::SchedulerCore
        
      
        
          1
          
            def every_day(start_time)
        
      
        
          3
          
              time = start_time.in_time_zone
        
      
        
          3
          
              cron "#{time.min} #{time.hour} * * * #{ActiveSupport::TimeZone::MAPPING[Time.zone.name]}" do
        
      
        
          
          
                yield
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def every_week(weekday, start_time)
        
      
        
          99
          
              time = start_time.in_time_zone
        
      
        
          99
          
              weekday_to_cron_number = {sunday: 0, monday: 1, tuesday: 2, wednesday: 3, thursday: 4, friday: 5, saturday: 6}.with_indifferent_access
        
      
        
          99
          
              cron "#{time.min} #{time.hour} * * #{weekday_to_cron_number[weekday]} #{ActiveSupport::TimeZone::MAPPING[Time.zone.name]}" do
        
      
        
          
          
                yield
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def schedule_job_for_time_period_every(interval, start_time, end_time)
        
      
        
          
          
              start_time_with_zone = start_time.in_time_zone
        
      
        
          
          
              end_time_with_zone = end_time.in_time_zone
        
      
        
          
          
              j = nil
        
      
        
          
          
          
        
      
        
          
          
              time_today = Time.current
        
      
        
          
          
              duration = end_time_with_zone - start_time_with_zone
        
      
        
          
          
              start_time_today = Time.zone.parse(start_time_with_zone.strftime('%H:%M:%S %Z'))
        
      
        
          
          
          
        
      
        
          
          
              # if current time is in an interval starting today OR current time is in an interval starting yesterday and ending today
        
      
        
          
          
              if (start_time_today..start_time_today + duration).cover?(time_today) || (start_time_today - 24.hours..start_time_today - (24.hours - duration)).cover?(time_today)
        
      
        
          
          
                j = every interval do
        
      
        
          
          
                  yield
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              every_day start_time_with_zone do
        
      
        
          
          
                j = every interval do
        
      
        
          
          
                  yield
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              every_day end_time_with_zone do
        
      
        
          
          
                j.try(:unschedule)
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module Sidekiq
        
      
        
          
          
          
        
      
        
          1
          
            class Client
        
      
        
          1
          
              def self.convert_to_staging_queue(queue)
        
      
        
          11
          
                "#{queue}_staging"
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              def self.push_bulk_staged(items)
        
      
        
          5
          
                normed = normalize_item(items)
        
      
        
          5
          
                normed['queue'] = convert_to_staging_queue normed['queue']
        
      
        
          5
          
                payloads = items['args'].map do |args|
        
      
        
          14
          
                  raise ArgumentError, 'Bulk arguments must be an Array of Arrays: [[1], [2]]' if !args.is_a?(Array)
        
      
        
          14
          
                  process_single(items['class'], normed.merge('args' => args, 'jid' => SecureRandom.hex(12), 'enqueued_at' => Time.now.to_f))
        
      
        
          
          
                end.compact
        
      
        
          
          
          
        
      
        
          5
          
                pushed = false
        
      
        
          5
          
                pushed = raw_push(payloads) if !payloads.empty?
        
      
        
          5
          
                pushed ? payloads.size : nil
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            module Worker
        
      
        
          
          
              # if socks error from tor client, just requeue the job and skip the rest by passing false
        
      
        
          
          
              # if throttled, requeue the job, sleep for desired time (if using the proxy) until you want to retry on this port
        
      
        
          
          
              # or if using the instance IP then set the dyno as throttled so future jobs know to immediately use the proxy
        
      
        
          1
          
              def handle_captcha(page, sleep_time, *args)
        
      
        
          59
          
                if page.socks_error?
        
      
        
          
          
                  self.class.perform_async *args
        
      
        
          
          
                  false
        
      
        
          58
          
                elsif page.ok? && page.captcha? || page.response_code == '503'
        
      
        
          
          
                  self.class.perform_async *args
        
      
        
          
          
          
        
      
        
          
          
                  if ProxyUtilities.using_proxy?
        
      
        
          
          
                    sleep(sleep_time)
        
      
        
          
          
                  else
        
      
        
          
          
                    RedisUtilities.set_dyno_throttled
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  false
        
      
        
          
          
                else
        
      
        
          58
          
                  page.ok?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          
          
              # BN Specific Version
        
      
        
          
          
              # if socks error from tor client, just requeue the job and skip the rest by passing false
        
      
        
          
          
              # if throttled (previously 429, seems to be 500 now), requeue the job, sleep for desired time (if using the proxy) until you want to retry on this port
        
      
        
          
          
              # or if using the instance IP then set the dyno as throttled so future jobs know to immediately use the proxy
        
      
        
          1
          
              def bn_captcha_sleepy_time(page, sleep_time, *args)
        
      
        
          3
          
                if page.socks_error?
        
      
        
          
          
                  self.class.perform_async *args
        
      
        
          
          
                  false
        
      
        
          3
          
                elsif page.net_persistent_error? || page.response_code == '429' || page.response_code == '500'
        
      
        
          
          
                  self.class.perform_async *args
        
      
        
          
          
          
        
      
        
          
          
                  if ProxyUtilities.using_proxy?
        
      
        
          
          
                    sleep(sleep_time)
        
      
        
          
          
                  else
        
      
        
          
          
                    RedisUtilities.set_dyno_throttled
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          
          
                  false
        
      
        
          
          
                else
        
      
        
          3
          
                  page.ok?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          require "net/http"
        
      
        
          
          
          
        
      
        
          1
          
          module HttpHelper
        
      
        
          1
          
            extend self
        
      
        
          
          
          
        
      
        
          1
          
            def get_html(url)
        
      
        
          2
          
              Nokogiri::HTML(fetch(url))
        
      
        
          
          
            rescue *HTTP_ERRORS
        
      
        
          1
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_json(url)
        
      
        
          9
          
              JSON.parse(fetch(url))
        
      
        
          
          
            rescue JSON::ParserError, *HTTP_ERRORS => e
        
      
        
          1
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_bn_search_page_html(url)
        
      
        
          1
          
              get_html_with_mechanize url, 'Mac FireFox'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_bn_list_stat_page_html(url)
        
      
        
          10
          
              get_html_with_mechanize url, 'Windows IE 6'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_bn_category_page_html(url, user_agent)
        
      
        
          3
          
              count = 0
        
      
        
          3
          
              valid_page = nil
        
      
        
          
          
          
        
      
        
          
          
              # Loop 5 times on each page to get a result otherwise give up
        
      
        
          3
          
              while count < 5 do
        
      
        
          8
          
                html = get_html_with_mechanize url, user_agent
        
      
        
          8
          
                unless ScraperUtilities.bn_no_results? html
        
      
        
          2
          
                  valid_page = html
        
      
        
          2
          
                  break
        
      
        
          
          
                end
        
      
        
          6
          
                count += 1
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          3
          
              valid_page
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_html_with_mechanize_no_rescue(url, user_agent_string)
        
      
        
          187
          
              agent = Mechanize.new
        
      
        
          187
          
              agent.user_agent_alias = user_agent_string
        
      
        
          187
          
              agent.idle_timeout = 5 if url.include? 'barnesandnoble.com'
        
      
        
          187
          
              page_or_file = agent.get(url)
        
      
        
          
          
              #TODO: Convert all uses to Mechanize::Page, seems to be what is happening anyway?
        
      
        
          186
          
              page_or_file.class == Mechanize::Page ? page_or_file.parser : Nokogiri::HTML(page_or_file.body)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_html_with_mechanize(url, user_agent_string)
        
      
        
          13
          
              get_html_with_mechanize_no_rescue url, user_agent_string
        
      
        
          
          
            rescue *HTTP_ERRORS => e
        
      
        
          
          
              if e.class == Mechanize::ResponseCodeError
        
      
        
          
          
                error = {error_class: Mechanize::ResponseCodeError.to_s, code: e.response_code}
        
      
        
          
          
              else
        
      
        
          
          
                error = {error_class: e.class.to_s}
        
      
        
          
          
              end
        
      
        
          
          
              Utilities.log('http_error', error)
        
      
        
          
          
              {}
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_canonical_amazon_url(asin, tld)
        
      
        
          4
          
              uri_str = Urls.amazon_book_page(asin, tld)
        
      
        
          4
          
              response = Net::HTTP.get_response(URI.parse(URI.encode(uri_str)))
        
      
        
          3
          
              case response
        
      
        
          
          
                when Net::HTTPSuccess then
        
      
        
          2
          
                  Rails.logger.tagged('cleanup') {Rails.logger.info "Basic URL #{uri_str} is valid no redirect"}
        
      
        
          1
          
                  uri_str
        
      
        
          
          
                when Net::HTTPRedirection then
        
      
        
          1
          
                  if response['location'].present?
        
      
        
          2
          
                    Rails.logger.tagged('cleanup') {Rails.logger.info "Basic URL redirects to: #{response['location']}"}
        
      
        
          1
          
                    response['location']
        
      
        
          
          
                  end
        
      
        
          
          
                else
        
      
        
          
          
                  nil
        
      
        
          
          
              end
        
      
        
          
          
            rescue *HTTP_ERRORS
        
      
        
          1
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_canonical_bn_url(isbn13, tld)
        
      
        
          5
          
              return nil unless isbn13.present? && tld == '.com'
        
      
        
          
          
          
        
      
        
          3
          
              agent = Mechanize.new
        
      
        
          3
          
              agent.user_agent_alias='Windows IE 6'
        
      
        
          3
          
              agent.get(Urls.bn_book_page(isbn13))
        
      
        
          2
          
              canonical_url = agent.history.last.uri.to_s
        
      
        
          2
          
              canonical_url if canonical_url.exclude?('noresults')
        
      
        
          
          
            rescue *HTTP_ERRORS
        
      
        
          1
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_canonical_goodread_url(key)
        
      
        
          5
          
              uri_str = Urls.goodreads_book_page(key)
        
      
        
          5
          
              response = Net::HTTP.get_response(URI.parse(URI.encode(uri_str)))
        
      
        
          4
          
              case response
        
      
        
          
          
                when Net::HTTPSuccess then
        
      
        
          2
          
                  Rails.logger.tagged('cleanup') {Rails.logger.info "Basic URL #{uri_str} is valid no redirect"}
        
      
        
          1
          
                  uri_str
        
      
        
          
          
                when Net::HTTPRedirection then
        
      
        
          
          
                  # 'http://www.goodreads.com/book' is not a valid url, this means there is no page for this key
        
      
        
          2
          
                  if response['location'].present? && response['location'] != 'http://www.goodreads.com/book'
        
      
        
          2
          
                    Rails.logger.tagged('cleanup') {Rails.logger.info "Basic URL redirects to: #{response['location']}"}
        
      
        
          1
          
                    response['location']
        
      
        
          
          
                  end
        
      
        
          
          
                else
        
      
        
          
          
                  nil
        
      
        
          
          
              end
        
      
        
          
          
            rescue *HTTP_ERRORS
        
      
        
          1
          
              nil
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            private
        
      
        
          
          
          
        
      
        
          1
          
            def fetch(uri_str, limit = 5)
        
      
        
          
          
              # You should choose a better exception.
        
      
        
          7
          
              raise ArgumentError, 'too many HTTP redirects' if limit == 0
        
      
        
          
          
          
        
      
        
          7
          
              response = Net::HTTP.get_response(URI.parse(URI.encode(uri_str)))
        
      
        
          
          
          
        
      
        
          7
          
              case response
        
      
        
          
          
                when Net::HTTPSuccess then
        
      
        
          7
          
                  body = response.body
        
      
        
          7
          
                  if body.encoding != 'UTF-8'
        
      
        
          7
          
                    original_encoding = body.encoding
        
      
        
          7
          
                    begin
        
      
        
          7
          
                      body = body.encode 'UTF-8'
        
      
        
          
          
                    rescue Encoding::UndefinedConversionError, Encoding::ConverterNotFoundError
        
      
        
          3
          
                      body.force_encoding 'ISO-8859-1'
        
      
        
          3
          
                      begin
        
      
        
          3
          
                        body = body.encode 'UTF-8'
        
      
        
          
          
                      rescue Encoding::UndefinedConversionError, Encoding::ConverterNotFoundError
        
      
        
          
          
                        body.force_encoding original_encoding
        
      
        
          
          
                      end
        
      
        
          
          
                    end
        
      
        
          
          
                  end
        
      
        
          7
          
                  body
        
      
        
          
          
                when Net::HTTPRedirection then
        
      
        
          
          
                  response['location'].present? ? fetch(response['location'], limit - 1) : (raise ArgumentError, 'Blank redirect!')
        
      
        
          
          
                else
        
      
        
          
          
                  response.value
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          class JsonColumn
        
      
        
          1
          
            def self.default_with(&block)
        
      
        
          
          
              new(block)
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def initialize(default)
        
      
        
          
          
              @default = default
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def dump(obj)
        
      
        
          
          
              MultiJson.dump(obj) unless obj.nil?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def load(json)
        
      
        
          
          
              return @default.call if json.nil?
        
      
        
          
          
              obj = MultiJson.load(json)
        
      
        
          
          
              obj.is_a?(Hash) ? obj.with_indifferent_access : obj
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          require 'sidekiq'
        
      
        
          
          
          
        
      
        
          1
          
          module HerokuScaler
        
      
        
          1
          
            extend self
        
      
        
          
          
          
        
      
        
          1
          
            @conditionally_log = nil
        
      
        
          
          
          
        
      
        
          1
          
            def set_conditionally_log
        
      
        
          1
          
              @conditionally_log = Utilities.is_flag_set? 'conditional_log'
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scale!(configurations = [])
        
      
        
          3
          
              if ENV['HEROKU_API_KEY'] && ENV['HEROKU_APP']
        
      
        
          3
          
                heroku = PlatformAPI.connect(ENV['HEROKU_API_KEY'])
        
      
        
          
          
          
        
      
        
          3
          
                process_counts = get_process_counts(heroku)
        
      
        
          3
          
                sidekiq_job_counts = get_sidekiq_job_counts
        
      
        
          6
          
                Rails.logger.tagged('scaler') {Rails.logger.info "process_counts: #{process_counts}, sidekiq_job_counts: #{sidekiq_job_counts}"} if @conditionally_log
        
      
        
          3
          
                configurations.each do |config|
        
      
        
          3
          
                  process_count = process_counts[config[:name]]
        
      
        
          9
          
                  total_job_count = config[:queues].sum {|queue| sidekiq_job_counts[queue]}
        
      
        
          3
          
                  needed_count = process_count_needed(total_job_count, config[:concurrency], config[:maximum_count])
        
      
        
          
          
          
        
      
        
          
          
                  # Scale up only 50 max per 30 seconds
        
      
        
          3
          
                  if process_count > config[:minimum_count] && total_job_count == 0
        
      
        
          2
          
                    new_count = [process_count - 50, config[:minimum_count]].max
        
      
        
          4
          
                    Rails.logger.tagged('scaler') {Rails.logger.info "Scaling #{config[:name]} down to #{new_count} from #{process_count}"}
        
      
        
          2
          
                    scale_process! config[:name], new_count, heroku
        
      
        
          
          
                  elsif needed_count > process_count
        
      
        
          1
          
                    new_count = [process_count + 50, needed_count].min
        
      
        
          2
          
                    Rails.logger.tagged('scaler') {Rails.logger.info "Scaling #{config[:name]} up to #{new_count}, current count:#{process_count} total_job_count:#{total_job_count}"}
        
      
        
          1
          
                    scale_process! config[:name], new_count, heroku
        
      
        
          
          
                  end
        
      
        
          
          
          
        
      
        
          6
          
                  Rails.logger.tagged('scaler') {Rails.logger.info "process_count: #{process_count}, total_job_count: #{total_job_count}, if condition: #{process_count > config[:minimum_count] && total_job_count == 0}, elsif condition: #{process_count < process_count_needed(total_job_count, config[:concurrency], config[:maximum_count])}, config: #{config}"} if @conditionally_log
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_process_counts(heroku = PlatformAPI.connect(ENV['HEROKU_API_KEY']), first_try = true)
        
      
        
          1
          
              process_counts = heroku.formation.list(ENV['HEROKU_APP']).each_with_object(HashWithIndifferentAccess.new(0)) do |process_details, hash|
        
      
        
          2
          
                hash[process_details['type']] = process_details['quantity']
        
      
        
          
          
              end
        
      
        
          2
          
              Rails.logger.tagged('scaler') {Rails.logger.info process_counts.inspect} if @conditionally_log
        
      
        
          1
          
              process_counts
        
      
        
          
          
            rescue Excon::Errors::Error => e
        
      
        
          
          
              if first_try
        
      
        
          
          
                Rails.logger.tagged('scaler') {Rails.logger.info "Retrying process counts because of error: #{e}"} if @conditionally_log
        
      
        
          
          
                get_process_counts(heroku, false)
        
      
        
          
          
              else
        
      
        
          
          
                Rails.logger.tagged('scaler') {Rails.logger.info "Not retrying process counts, second error: #{e}"} if @conditionally_log
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def get_sidekiq_job_counts
        
      
        
          3
          
              counts = Sidekiq::Queue.all.each_with_object(HashWithIndifferentAccess.new(0)) {|queue, hash| hash[queue.name] = queue.size}
        
      
        
          1
          
              Sidekiq.redis do |conn|
        
      
        
          1
          
                conn.smembers('workers').each do |w|
        
      
        
          3
          
                  msg = conn.get("worker:#{w}")
        
      
        
          3
          
                  counts[Sidekiq.load_json(msg)['queue']] += 1 if msg.present?
        
      
        
          
          
                end
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          1
          
              counts
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def process_count_needed(pending_job_count, concurrency, maximum_count)
        
      
        
          8
          
              [(pending_job_count.to_f / concurrency).ceil, maximum_count].min
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def scale_process!(process_name, quantity, heroku = PlatformAPI.connect(ENV['HEROKU_API_KEY']), first_try = true)
        
      
        
          3
          
              heroku.formation.update(ENV['HEROKU_APP'], process_name, {'quantity' => quantity})
        
      
        
          
          
            rescue Excon::Errors::Error => e
        
      
        
          2
          
              if first_try
        
      
        
          2
          
                Rails.logger.tagged('scaler') {Rails.logger.info "Retrying scale because of error: #{e}"} if @conditionally_log
        
      
        
          1
          
                scale_process!(process_name, quantity, heroku, false)
        
      
        
          
          
              else
        
      
        
          2
          
                Rails.logger.tagged('scaler') {Rails.logger.info "Not retrying scale, second error: #{e}"} if @conditionally_log
        
      
        
          
          
              end
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          require 'json'
        
      
        
          
          
          
        
      
        
          1
          
          class SmtpApiHeader
        
      
        
          1
          
            def initialize(to = nil)
        
      
        
          14
          
              @data = {}
        
      
        
          14
          
              add_to to if to.present?
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def add_to(to)
        
      
        
          12
          
              @data['to'] ||= []
        
      
        
          12
          
              @data['to'] += to.kind_of?(Array) ? to : [to]
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def as_json()
        
      
        
          12
          
              json = JSON.generate @data
        
      
        
          12
          
              json.gsub(/(["\]}])([,:])(["\[{])/, '\\1\\2 \\3')
        
      
        
          
          
            end
        
      
        
          
          
          end

    
      
        
          1
          
          module Urls
        
      
        
          1
          
            extend self
        
      
        
          
          
          
        
      
        
          1
          
            def amazon_book_category_page(category_id, tld, base_category, page_number)
        
      
        
          15
          
              url = "http://www.amazon#{tld}/Best-Sellers/zgbs/#{base_category == 'Books' ? 'books' : 'digital-text'}/"
        
      
        
          15
          
              url += "#{category_id}/" if category_id.present?
        
      
        
          15
          
              url += 'ref=zg_bs_'
        
      
        
          15
          
              url += "#{"#{category_id}_" if category_id.present?}"
        
      
        
          15
          
              url + "pg_#{page_number}?_encoding=UTF8&pg=#{page_number}"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def amazon_book_page(asin, tld)
        
      
        
          115
          
              "http://www.amazon#{tld}/gp/product/#{asin}"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def amazon_author_page(author_asin, tld)
        
      
        
          2
          
              "http://www.amazon#{tld}/a/e/#{author_asin}"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def amazon_search_page(isbn_or_asin, tld)
        
      
        
          
          
              # field-asin parameter only works with search-alias parameter set
        
      
        
          2
          
              isbn_or_asin.scan(/\D/).present? ? "http://www.amazon#{tld}/gp/search/?search-alias=stripbooks&field-asin=#{isbn_or_asin}" : "http://www.amazon#{tld}/gp/search/?&field-isbn=#{isbn_or_asin}"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # We default this to a max of 10, can make it configuarable if we want
        
      
        
          1
          
            def amazon_related_format_url(parent_asin, dom_format, tld)
        
      
        
          7
          
              "http://www.amazon#{tld}/gp/media-matrix/fetch-expansion-data.html?metaBinding=#{dom_format}_meta_binding&productCategory=books&parentAsin=#{parent_asin}&startIndex=1&count=10"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def apple_book_category_feed(category_id, type)
        
      
        
          12
          
              raise NotImplementedError unless %w[paid free].include? type.to_s
        
      
        
          
          
          
        
      
        
          11
          
              "https://itunes.apple.com/us/rss/top#{type}ebooks/limit=200/genre=#{category_id}/xml"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def bn_book_page(ean)
        
      
        
          21
          
              "http://www.barnesandnoble.com/w/a/?ean=#{ean}"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def bn_search_page(isbn13)
        
      
        
          
          
              "http://www.barnesandnoble.com/s/#{isbn13}"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          
          
            # Escape the string and append a search param to get specific book types in search results
        
      
        
          1
          
            def bn_search_page_by_details(title, author_name, book_format)
        
      
        
          7
          
              if book_format.include? 'Paperback'
        
      
        
          1
          
                search_param = '?aref=1521'
        
      
        
          
          
              elsif book_format.include? 'Hardcover'
        
      
        
          1
          
                search_param = '?aref=1519'
        
      
        
          
          
              elsif book_format.include? 'Kindle'
        
      
        
          5
          
                search_param = '?dref=2207'
        
      
        
          
          
              else
        
      
        
          
          
                search_param = '?dref=1'
        
      
        
          
          
              end
        
      
        
          
          
          
        
      
        
          7
          
              "http://www.barnesandnoble.com/s/#{CGI.escape("#{title.gsub(':','')} #{author_name}")}#{search_param}&view=list"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def bn_category_page(category_id, book_format, page_number)
        
      
        
          4
          
              url_params = [("aref=#{Utilities.bn_format_code(book_format)}" if Utilities.bn_format_code(book_format).present?),
        
      
        
          
          
                            "dref=#{category_id}",
        
      
        
          4
          
                            ("size=90&startat=#{(page_number - 1) * 90 + 1}" if page_number.present?),
        
      
        
          4
          
                            ('view=grid' if page_number.present? && book_format.present?)]
        
      
        
          
          
          
        
      
        
          4
          
              "http://www.barnesandnoble.com/s/?#{url_params.compact.join('&')}"
        
      
        
          
          
            end
        
      
        
          
          
          
        
      
        
          1
          
            def goodreads_book_page(key)
        
      
        
          7
          
              "http://www.goodreads.com/book/isbn/#{key}"
        
      
        
          
          
            end
        
      
        
          
          
          end

All Files (78.48% covered at 55.87 hits/line)

Libraries (87.42% covered at 13.08 hits/line)

Jobs (100.0% covered at 0.0 hits/line)

Workers (76.34% covered at 52.22 hits/line)

Modules (85.33% covered at 227.15 hits/line)

Models (76.12% covered at 14.45 hits/line)

API (52.25% covered at 1.95 hits/line)

Mailers (79.49% covered at 2.37 hits/line)

app/controllers/api/base_controller.rb

55.56 % covered

app/controllers/api/book_version_categories_controller.rb

23.08 % covered

app/controllers/api/book_versions_controller.rb

30.0 % covered

app/controllers/api/categories_controller.rb

30.0 % covered

app/controllers/api/category_stats_controller.rb

25.0 % covered

app/controllers/api/product_stats_controller.rb

27.78 % covered

app/mailers/enterprise_reports_mailer.rb

100.0 % covered

app/mailers/internal_reports_mailer.rb

33.33 % covered

app/mailers/notification_mailer.rb

100.0 % covered

app/models/amazon_api_response.rb

100.0 % covered

app/models/amazon_api_response_item.rb

100.0 % covered

app/models/amazon_author_page.rb

100.0 % covered

app/models/amazon_best_sellers_page.rb

92.75 % covered

app/models/amazon_category_collection.rb

100.0 % covered

app/models/amazon_kindle_daily_deals_page.rb

96.88 % covered

app/models/amazon_kindle_monthly_deals_page.rb

81.82 % covered

app/models/amazon_kindle_select_page.rb

100.0 % covered

app/models/amazon_page.rb

100.0 % covered

app/models/amazon_product_page.rb

89.63 % covered

app/models/amazon_search_page.rb

43.18 % covered

app/models/apple_top_books_rss_feed.rb

100.0 % covered

app/models/bn_book_page.rb

100.0 % covered

app/models/bn_category_page.rb

50.0 % covered

app/models/bn_nook_daily_find_page.rb

96.67 % covered

app/models/bn_nook_under299.rb

100.0 % covered

app/models/bn_page.rb

66.67 % covered

app/models/bn_search_page.rb

80.95 % covered

app/models/book_version_category.rb

100.0 % covered

app/models/book_version_exception.rb

100.0 % covered

app/models/book_version_status_collection.rb

100.0 % covered

app/models/booklr_stat.rb

100.0 % covered

app/models/category_stat.rb

100.0 % covered

app/models/category_stats_collection.rb

60.0 % covered

app/models/goodreads_book_page.rb

100.0 % covered

app/models/page.rb

89.36 % covered

app/models/report.rb

35.9 % covered