lst=[ "col_1, col_2, col_3",
"1, ABC, Foo1",
"2, ABCD, Foo2",
"3, ABCDE, Foo3",
"4, ABCDEF, Foo4",
"5, DEF, Foo5",
"6, DEFGHI, Foo6",
"7, GHI, Foo7",
"8, GHIJKL, Foo8",
"9, JKLMNO, Foo9",
"10, MNO, Foo10"]
full_csv = sc.parallelize(lst)
header_cols= full_csv.zipWithIndex().filter(lambda x : x[1] > 0).map(lambda x : x[0])
header_cols.collect()
No comments:
Post a Comment