@@ -71,6 +71,7 @@ class BaseSQLToGCSOperator(BaseOperator):
71
71
If set as a sequence, the identities from the list must grant
72
72
Service Account Token Creator IAM role to the directly preceding identity, with first
73
73
account from the list granting this role to the originating account (templated).
74
+ :param exclude_columns: set of columns to exclude from transmission
74
75
"""
75
76
76
77
template_fields : Sequence [str ] = (
@@ -103,9 +104,13 @@ def __init__(
103
104
gcp_conn_id : str = 'google_cloud_default' ,
104
105
delegate_to : Optional [str ] = None ,
105
106
impersonation_chain : Optional [Union [str , Sequence [str ]]] = None ,
107
+ exclude_columns = None ,
106
108
** kwargs ,
107
109
) -> None :
108
110
super ().__init__ (** kwargs )
111
+ if exclude_columns is None :
112
+ exclude_columns = set ()
113
+
109
114
self .sql = sql
110
115
self .bucket = bucket
111
116
self .filename = filename
@@ -120,6 +125,7 @@ def __init__(
120
125
self .gcp_conn_id = gcp_conn_id
121
126
self .delegate_to = delegate_to
122
127
self .impersonation_chain = impersonation_chain
128
+ self .exclude_columns = exclude_columns
123
129
124
130
def execute (self , context : 'Context' ):
125
131
self .log .info ("Executing query" )
@@ -165,7 +171,9 @@ def _write_local_data_files(self, cursor):
165
171
names in GCS, and values are file handles to local files that
166
172
contain the data for the GCS objects.
167
173
"""
168
- schema = list (map (lambda schema_tuple : schema_tuple [0 ], cursor .description ))
174
+ org_schema = list (map (lambda schema_tuple : schema_tuple [0 ], cursor .description ))
175
+ schema = [column for column in org_schema if column not in self .exclude_columns ]
176
+
169
177
col_type_dict = self ._get_col_type_dict ()
170
178
file_no = 0
171
179
@@ -314,7 +322,11 @@ def _write_local_schema_file(self, cursor):
314
322
schema = self .schema
315
323
else :
316
324
self .log .info ("Starts generating schema" )
317
- schema = [self .field_to_bigquery (field ) for field in cursor .description ]
325
+ schema = [
326
+ self .field_to_bigquery (field )
327
+ for field in cursor .description
328
+ if field [0 ] not in self .exclude_columns
329
+ ]
318
330
319
331
if isinstance (schema , list ):
320
332
schema = json .dumps (schema , sort_keys = True )
0 commit comments