require 'csv' class UsdaImporter ABBREV_COLUMNS = [ 'NDB_No', 'Shrt_Desc', 'Water', 'Energ_Kcal', 'Protein', 'Lipid_Tot', 'Ash', 'Carbohydrt', 'Fiber_TD', 'Sugar_Tot', 'Calcium', 'Iron', 'Magnesium', 'Phosphorus', 'Potassium', 'Sodium', 'Zinc', 'Copper', 'Manganese', 'Selenium', 'Vit_C', 'Thiamin', 'Riboflavin', 'Niacin', 'Panto_acid', 'Vit_B6', 'Folate_Tot', 'Folic_acid', 'Food_Folate', 'Folate_DFE', 'Choline_Tot', 'Vit_B12', 'Vit_A_IU', 'Vit_A_RAE', 'Retinol', 'Alpha_Carot', 'Beta_Carot', 'Beta_Crypt', 'Lycopene', 'Lut+Zea', 'Vit_E', 'Vit_D_mcg', 'Vit_D_IU', 'Vit_K', 'FA_Sat', 'FA_Mono', 'FA_Poly', 'Cholestrl', 'GmWt_1', 'GmWt_Desc1', 'GmWt_2', 'GmWt_Desc2', 'Refuse_Pct' ] FOOD_DATA_COLUMNS = [ 'NDB_No', 'FdGrp_Cd', 'Long_Desc', 'Shrt_Desc', 'ComName', 'ManufacName', 'Survey', 'Ref_desc', 'Refuse', 'SciName', 'N_Factor', 'Pro_Factor', 'Fat_Factor', 'CHO_Factor' ] ABBREV_COLUMN_MAP = { ndbn: 'NDB_No', short_description: 'Shrt_Desc', water: 'Water', kcal: 'Energ_Kcal', protein: 'Protein', lipid: 'Lipid_Tot', ash: 'Ash', carbohydrates: 'Carbohydrt', fiber: 'Fiber_TD', sugar: 'Sugar_Tot', gram_weight_1: 'GmWt_1', gram_weight_2: 'GmWt_2', gram_weight_desc_1: 'GmWt_Desc1', gram_weight_desc_2: 'GmWt_Desc2', refuse_percent: 'Refuse_Pct' } FOOD_DATA_COLUMN_MAP = { scientific_name: 'SciName', refuse_description: 'Ref_desc', long_description: 'Long_Desc' } def initialize(directory) @directory = directory end def import UsdaFood.delete_all food_data_lookup = {} CSV.open(File.join(@directory, 'FOOD_DES.txt'), 'r:iso-8859-1:utf-8', csv_options(FOOD_DATA_COLUMNS)) do |csv| csv.each do |row| food_data_lookup[row['NDB_No']] = row.to_h end end CSV.open(File.join(@directory, 'ABBREV.txt'), 'r:iso-8859-1:utf-8', csv_options(ABBREV_COLUMNS)) do |csv| csv.each_slice(500) do |slice| UsdaFood.transaction do attributes = slice.map do |row| attrs = Hash[ABBREV_COLUMN_MAP.map { |db, col| [db, row[col]] }] lookup = food_data_lookup[attrs[:ndbn]] if lookup extra_attrs = Hash[FOOD_DATA_COLUMN_MAP.map { |db, col| [db, lookup[col]] }] attrs.merge!(extra_attrs) end attrs end UsdaFood.create(attributes) end end end usda_items = Hash[UsdaFood.where(ndbn: Ingredient.select(:ndbn)).map { |uf| [uf.ndbn, uf] }] Ingredient.where('ndbn IS NOT NULL').each do |i| item = usda_items[i.ndbn] if item i.set_usda_food(item) i.save end end end def csv_options(headers) { col_sep: '^', quote_char: '~', headers: headers } end end