|
|
- from urllib.parse import urlparse
-
- def clean_articles(rows):
- #article_id, url, title, byline
- out = []
-
- for row in rows:
- parsed_uri = urlparse(row[1])
- result = '{uri.netloc}'.format(uri=parsed_uri)
- out.append([row[0], row[1], row[2], row[3], result])
-
- return out
|