parsley/lib/usda_importer.rb
2016-01-28 18:18:45 -06:00

155 lines
3.0 KiB
Ruby

require 'csv'
class UsdaImporter
ABBREV_COLUMNS = [
'NDB_No',
'Shrt_Desc',
'Water',
'Energ_Kcal',
'Protein',
'Lipid_Tot',
'Ash',
'Carbohydrt',
'Fiber_TD',
'Sugar_Tot',
'Calcium',
'Iron',
'Magnesium',
'Phosphorus',
'Potassium',
'Sodium',
'Zinc',
'Copper',
'Manganese',
'Selenium',
'Vit_C',
'Thiamin',
'Riboflavin',
'Niacin',
'Panto_acid',
'Vit_B6',
'Folate_Tot',
'Folic_acid',
'Food_Folate',
'Folate_DFE',
'Choline_Tot',
'Vit_B12',
'Vit_A_IU',
'Vit_A_RAE',
'Retinol',
'Alpha_Carot',
'Beta_Carot',
'Beta_Crypt',
'Lycopene',
'Lut+Zea',
'Vit_E',
'Vit_D_mcg',
'Vit_D_IU',
'Vit_K',
'FA_Sat',
'FA_Mono',
'FA_Poly',
'Cholestrl',
'GmWt_1',
'GmWt_Desc1',
'GmWt_2',
'GmWt_Desc2',
'Refuse_Pct'
]
FOOD_DATA_COLUMNS = [
'NDB_No',
'FdGrp_Cd',
'Long_Desc',
'Shrt_Desc',
'ComName',
'ManufacName',
'Survey',
'Ref_desc',
'Refuse',
'SciName',
'N_Factor',
'Pro_Factor',
'Fat_Factor',
'CHO_Factor'
]
ABBREV_COLUMN_MAP = {
ndbn: 'NDB_No',
short_description: 'Shrt_Desc',
water: 'Water',
kcal: 'Energ_Kcal',
protein: 'Protein',
lipid: 'Lipid_Tot',
ash: 'Ash',
carbohydrates: 'Carbohydrt',
fiber: 'Fiber_TD',
sugar: 'Sugar_Tot',
gram_weight_1: 'GmWt_1',
gram_weight_2: 'GmWt_2',
gram_weight_desc_1: 'GmWt_Desc1',
gram_weight_desc_2: 'GmWt_Desc2',
refuse_percent: 'Refuse_Pct'
}
FOOD_DATA_COLUMN_MAP = {
scientific_name: 'SciName',
refuse_description: 'Ref_desc',
long_description: 'Long_Desc'
}
def initialize(directory)
@directory = directory
end
def import
UsdaFood.delete_all
food_data_lookup = {}
CSV.open(File.join(@directory, 'FOOD_DES.txt'), 'r:iso-8859-1:utf-8', csv_options(FOOD_DATA_COLUMNS)) do |csv|
csv.each do |row|
food_data_lookup[row['NDB_No']] = row.to_h
end
end
CSV.open(File.join(@directory, 'ABBREV.txt'), 'r:iso-8859-1:utf-8', csv_options(ABBREV_COLUMNS)) do |csv|
csv.each_slice(500) do |slice|
UsdaFood.transaction do
attributes = slice.map do |row|
attrs = Hash[ABBREV_COLUMN_MAP.map { |db, col| [db, row[col]] }]
lookup = food_data_lookup[attrs[:ndbn]]
if lookup
extra_attrs = Hash[FOOD_DATA_COLUMN_MAP.map { |db, col| [db, lookup[col]] }]
attrs.merge!(extra_attrs)
end
attrs
end
UsdaFood.create(attributes)
end
end
end
usda_items = Hash[UsdaFood.where(ndbn: Ingredient.select(:ndbn)).map { |uf| [uf.ndbn, uf] }]
Ingredient.where('ndbn IS NOT NULL').each do |i|
item = usda_items[i.ndbn]
if item
i.set_usda_food(item)
i.save
end
end
end
def csv_options(headers)
{ col_sep: '^', quote_char: '~', headers: headers }
end
end