from urllib.parse import urlparse
|
|
|
|
def clean_articles(rows):
|
|
#article_id, url, title, byline
|
|
out = []
|
|
|
|
for row in rows:
|
|
parsed_uri = urlparse(row[1])
|
|
result = '{uri.netloc}'.format(uri=parsed_uri)
|
|
out.append([row[0], row[1], row[2], row[3], result])
|
|
|
|
return out
|