-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyspark-test.py
46 lines (32 loc) · 1.05 KB
/
pyspark-test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# pyspark
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder \
.master('local[1]') \
.appName('AudioValidator.com') \
.getOrCreate()
sc = spark.session
# Import modules
import os, sys, boto3
import json
import matplotlib.pyplot as plt
import pandas as pd
# Audio validator
from audioValidator.generator import generator
from audioValidator.results import results
from audioValidator.comparator import comparator
from audioValidator.utils import utils
# s3 config
bucket = "band-cloud-audio-validation"
s3_client = boto3.client('s3')
# Configure analysis list
dataDir = '/home/hadoop/examples'
toDo = []
for track in os.listdir(dataDir):
trackName = track.replace('.wav', '')
toDo.append( (trackName, track) )
# Options: foreach, map
# outMap = list(map( utils.classifyAudioSignal_fromTuple, toDo ))
toDo_spark = sc.parallelize(toDo)
output = toDo_spark.map(utils.classifyAudioSignal_fromTuple).collect()
response = s3_client.upload_file(file_name, bucket, object_name)