2016-03-15 61 views
0

我已经做了一些关于的阅读,我正在尝试实现它,但它不能像我所期望的那样工作。最初我的代码没有使用bulk_create,大约需要33秒钟才能导入6074行数据。慢,但它的工作。Django的bulk_create()

型号:

class Building(models.Model): 
    community = models.ForeignKey('Community', related_name='Building Community Name') 
    physical_location = models.CharField(max_length=80, null=True, blank=True) 
    data_source = models.CharField(max_length=50, null=True, blank=True) 
    facility_name = models.CharField(max_length=120, null=True, blank=True) 
    facility_type = models.CharField(max_length=80, null=True, blank=True) 
    size = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    audited = models.NullBooleanField(blank=True) 
    audit_notes = models.TextField(blank=True) 

class RetrofitData(models.Model): 
    building_id = models.ForeignKey('Building') 
    retrofits_done = models.NullBooleanField(blank=True) 
    retrofit_notes = models.TextField(blank=True) 
    fuel_oil_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    district_heating_oil_usage_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    electricity_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    natural_gas_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    propane_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    biomass_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    fuel_oil_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    district_heating_oil_usage_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    electricity_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    natural_gas_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    propane_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    biomass_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    retrofit_cost = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2) 
    biomass_heat = models.NullBooleanField(blank=True) 
    heat_recovery = models.NullBooleanField(blank=True) 

原始代码:

class BuildingInventoryImporter(dataimport.DataFileImporter): 

    def toTrueFalse(self, val): 
      if val == "Yes": 
       return True 
      elif val == "No": 
       return False 
      else: 
       return None 

    def decCleaner(self, val): 
     if val == '': 
      return None 
     else: 
      return val2dec(val) 

    models = [Building, RetrofitData] 

    @transaction.commit_manually 
    @rollback_on_exception 
    def do_import(self): 
     book = xlrd.open_workbook(self.data_file.file.path, 
      encoding_override='cp1252') 
     sheet = book.sheet_by_index(2) 

     for row_index in range(1,sheet.nrows): 
      row = sheet.row_values(row_index) 

      temp_id= row_index 
      community_name = row[0] 
      gnis = row[1] 
      physical_location = row[2] 
      data_source = row[3] 
      facility_type = row[5] 

      if row[4] == '': 
       if facility_type =="Other" or facility_type == "Office" or facility_type == "Public Assembly" or facility_type == "Public Safety": 
        facility_name = "Unavailable" 
       elif facility_type =="Health Care - Hospitals": 
        facility_name = community_name + " Clinic" 
       elif facility_type == "Education - K - 12": 
        facility_name = community_name + " School(s)" 
      else: 
       facility_name = row[4]  

      size = self.decCleaner(row[6]) 
      audited = self.toTrueFalse(row[7]) 
      audit_notes = row[8] 

      building, created = self.get_or_new(Building, id=temp_id) 

      try: 
       community = Community.objects.get(gnis_feature_id=gnis) 
      except Community.DoesNotExist: 
       self.warning("The value entered for the Community GNIS: {0} does not exist.".format(row[1])) 

       try: 
        community = Community.objects.get(name=community_name) 
        self.warning("The Community name: {0} is in the db but does not match its associated Community GNIS").format(row[0]) 
       except Community.DoesNotExist: 
        self.warning("Neither the Community name: {0} nor the Community GNIS: {1} exist.".format(row[0], row[1])) 
        continue 

      building.community = community 
      building.physical_location = physical_location 
      building.data_source = data_source 
      building.facility_name = facility_name 
      building.facility_type = facility_type 
      building.size = size 
      building.audited = audited 
      building.audit_notes = audit_notes 
      building.save() 

      retrofit_data, created = self.get_or_new(RetrofitData, building_id=building) 

      retrofit_data.retrofits_done = self.toTrueFalse(row[9]) 
      retrofit_data.retrofit_notes = row[10] 
      retrofit_data.fuel_oil_preretrofit = self.decCleaner(row[11]) 

      if row[12] == 999999999: #They decided that a unknown value would be represented as 999999999 in the dataset. 
       retrofit_data.district_heating_oil_usage_preretrofit = None 
      else: 
       retrofit_data.district_heating_oil_usage_preretrofit = self.decCleaner(row[12]) 

      retrofit_data.electricity_preretrofit = self.decCleaner(row[13]) 
      retrofit_data.natural_gas_preretrofit = self.decCleaner(row[14]) 
      retrofit_data.propane_preretrofit = self.decCleaner(row[15]) 
      retrofit_data.biomass_preretrofit = self.decCleaner(row[16]) 
      retrofit_data.fuel_oil_postretrofit = self.decCleaner(row[17]) 
      retrofit_data.district_heating_oil_usage_postretrofit = self.decCleaner(row[18]) 
      retrofit_data.electricity_postretrofit = self.decCleaner(row[19]) 
      retrofit_data.natural_gas_postretrofit = self.decCleaner(row[20]) 
      retrofit_data.propane_postretrofit = self.decCleaner(row[21]) 
      retrofit_data.biomass_postretrofit = self.decCleaner(row[22]) 
      retrofit_data.retrofit_cost = self.decCleaner(row[23]) 
      retrofit_data.biomass_heat = self.toTrueFalse(row[24]) 
      retrofit_data.heat_recovery = self.toTrueFalse(row[25]) 
      retrofit_data.save() 

     if self.dry_run: 
      transaction.rollback() 
     else: 
      transaction.commit() 
dataimport.register(BuildingInventoryImporter) 

它必须在整个数据导入的过程中打如数据库〜1200倍,其导致进口缓慢。因此,要解决这个问题我看着使用bulk_create()

修改代码:当我到达的第二个代码块中批量导入RetroFitData

class BuildingInventoryImporterV2(dataimport.DataFileImporter): 

    models = [Building, RetrofitData] 

    def do_import(self, dry_run=True):  
     book = xlrd.open_workbook(self.data_file.file.path, 
      encoding_override='cp1252') 
      sheet = book.sheet_by_index(2) 

     building_bulk_list = [] 
     retrofit_bulk_list = [] 

     for row_index in range(1,sheet.nrows): 
      row = sheet.row_values(row_index) 

      temp_id= row_index 
      community_name = row[0] 
      gnis = row[1] 
      facility_type = row[5] 

      try: 
       community = Community.objects.get(gnis_feature_id=gnis) 
      except Community.DoesNotExist: 
       self.warning("The value entered for the Community GNIS: {0} does not exist.".format(row[1])) 

       try: 
        community = Community.objects.get(name=community_name) 
        self.warning("The Community name: {0} is in the db but does not match its associated Community GNIS").format(row[0]) 
       except Community.DoesNotExist: 
        self.warning("Neither the Community name: {0} nor the Community GNIS: {1} exist.".format(row[0], row[1])) 
        continue 

      if row[4] == '': 
       if facility_type =="Other" or facility_type == "Office" or facility_type == "Public Assembly" or facility_type == "Public Safety": 
        facility_name = "Unavailable" 
       elif facility_type =="Health Care - Hospitals": 
        facility_name = community_name + " Clinic" 
       elif facility_type == "Education - K - 12": 
        facility_name = community_name + " School(s)" 
      else: 
       facility_name = row[4] 

      building_to_add = Building( 
       community=community,  
       physical_location=row[2],  
       data_source=row[3],  
       facility_name=facility_name,  
       facility_type=facility_type,  
       size=self.decCleaner(row[6]),  
       audited=self.toTrueFalse(row[7]),  
       audit_notes=row[8]  
      ) 
      building_bulk_list.append(building_to_add) 
     if self.dry_run is False: 
      Building.objects.bulk_create(building_bulk_list) 

     for row_index in range(1,sheet.nrows): 
      row = sheet.row_values(row_index) 
      #They decided that a unknown value would be represented as 999999999 in the dataset. 

      if row[12] == 999999999:  
       district_heating_oil_usage_preretrofit = None  
      else:  
       district_heating_oil_usage_preretrofit = self.decCleaner(row[12]) 

      retrofit_data_to_add = RetrofitData( 
       building_id=Building.objects.get(id=temp_id),  
       retrofits_done=self.toTrueFalse(row[9]),  
       retrofit_notes=row[10],  
       fuel_oil_preretrofit=self.decCleaner(row[11]),  
       district_heating_oil_usage_preretrofit=district_heating_oil_usage_preretrofit,  
       electricity_preretrofit=self.decCleaner(row[13]),  
       natural_gas_preretrofit=self.decCleaner(row[14]),  
       propane_preretrofit=self.decCleaner(row[15]),  
       biomass_preretrofit=self.decCleaner(row[16]),  
       fuel_oil_postretrofit=self.decCleaner(row[17]),  
       district_heating_oil_usage_postretrofit=self.decCleaner(row[18]),  
       electricity_postretrofit=self.decCleaner(row[19]),  
       natural_gas_postretrofit=self.decCleaner(row[20]),  
       propane_postretrofit=self.decCleaner(row[21]),  
       biomass_postretrofit=self.decCleaner(row[22]),  
       retrofit_cost=self.decCleaner(row[23]),  
       biomass_heat=self.toTrueFalse(row[24]),  
       heat_recovery=self.toTrueFalse(row[25])  
      )  
      retrofit_bulk_list.append(retrofit_data_to_add) 

     if self.dry_run is False:  
      Building.objects.bulk_create(retrofit_bulk_list)  
dataimport.register(BuildingInventoryImporterV2) 

出现问题。据我的理解,bulk_create()在被调用时不会分配AutoField pk,因此您需要将bulk_create()数据放入数据库中,然后才会分配AutoField pk。但是,这似乎并不准确。运行导入后,我收到以下错误:

Traceback: 
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/core/handlers/base.py" in get_response 
     111.      response = wrapped_callback(request, *callback_args, **callback_kwargs) 
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/utils/decorators.py" in _wrapped_view 
     105.      response = view_func(request, *args, **kwargs) 
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/views/decorators/cache.py" in _wrapped_view_func 
     52.   response = view_func(request, *args, **kwargs) 
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/contrib/admin/sites.py" in inner 
     206.    return view(request, *args, **kwargs) 
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/contrib/auth/decorators.py" in _wrapped_view 
     21.     return view_func(request, *args, **kwargs) 
    File "/home/bhernandez/ISER/aedg/core/adminviews.py" in data_import 
     465.     results = importer.run() 
    File "/home/bhernandez/ISER/aedg/core/dataimport/__init__.py" in run 
     114.   self.do_import() 
    File "/home/bhernandez/ISER/aedg/akw/dataimport/etc.py" in do_import 
     656.     building_id=Building.objects.get(id=temp_id),  
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/db/models/manager.py" in manager_method 
     92.     return getattr(self.get_queryset(), name)(*args, **kwargs) 
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/db/models/query.py" in get 
     357.     self.model._meta.object_name) 

    Exception Type: DoesNotExist at /admin/core/datafile/174/import/ 
    Exception Value: Building matching query does not exist. 

但是,当我检查我的Buildings table it's been populated...任何帮助或建议是非常赞赏。

+0

倒数第二行不应该是'RetrofitData.objects.bulk_create(retrofit_bulk_list)'而不是'Building.objects.bulk_create(building_bulk_list)''因为你已经完成了创建? –

+0

我认为你的第二个bulk_create行有一个复制/粘贴错误。 – Gocht

+0

@marksweb感谢指出,修正了,但我仍然遇到同样的问题。数据将填充Building表,但RetroFitData部分中的get查询失败。 –

回答

0

因此,当您创建RetrofitData时,您需要知道刚刚创建的Building对象的ID。

您正在使用ID字段设置为自动增量的数据库的机会,因此使用bulk_create创建的对象将不会获得PK分配。

我想使用building_bulk_list你可以使用它的长度从数据库中获取的最后一组Building对象,但为什么不创建building_bulk_list用于创建对象的一个​​更传统的方法,调用save(),使您可以创建一个列表的ID?

然后使用该ID列表,您可以运行bulk_createRetrofitData,遍历该ID列表以建立关系到Building

相关问题