Skip to content

Commit 90ce598

Browse files
committed
Refine Wordcloud to be more universal & language agnostic
1 parent 917097e commit 90ce598

File tree

2 files changed

+34
-6
lines changed

2 files changed

+34
-6
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Words for universal Wordcloud
2+
3+
MATCH (named:!Key&!Primitive&!PrimitiveType&!Void&!JavaType&!ResolvedDuplicateType&!ExternalType)
4+
WHERE named.name > ''
5+
AND named.name <> 'package-info'
6+
AND named.name <> '<init>'
7+
AND named.name <> '<clinit>'
8+
WITH apoc.text.replace(named.name, '(?<!^)([-_A-Z\W])', ' $1') AS words
9+
WITH apoc.text.replace(words, '[-_0-9\W]', ' ') AS words
10+
WITH apoc.text.replace(words, '\s+', ' ') AS words
11+
WITH split(toLower(trim(words)), ' ') AS words
12+
UNWIND words AS word
13+
WITH word AS word
14+
WHERE size(word) > 1
15+
RETURN word
16+
// ,count(*) as numberOfAppearances
17+
//ORDER BY numberOfAppearances DESC, word

jupyter/Wordcloud.ipynb

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@
125125
"outputs": [],
126126
"source": [
127127
"# Query data from graph database\n",
128-
"words = query_cypher_to_data_frame(\"../cypher/Overview/Words_for_Wordcloud.cypher\")\n",
129-
"words.head(20)"
128+
"words = query_cypher_to_data_frame(\"../cypher/Overview/Words_for_universal_Wordcloud.cypher\")\n",
129+
"words.head(30)"
130130
]
131131
},
132132
{
@@ -141,11 +141,22 @@
141141
"number_of_words=len(words.word)\n",
142142
"print(\"There are {} words in the dataset.\".format(number_of_words))\n",
143143
"\n",
144+
"# Define stop words\n",
145+
"stopwords = set(STOPWORDS)\n",
146+
"stopwords.update(['builder', 'exception', 'abstract', 'helper', 'util', 'callback', 'factory', 'result',\n",
147+
" 'handler', 'type', 'module', 'name', 'parameter', 'lambda', 'access', 'create', 'message', \n",
148+
" 'ts', 'js', 'tsx', 'jsx', 'css', 'htm', 'html', 'props', 'use', 'id', 'ref', 'hook', 'event', \n",
149+
" 'span', 'data', 'context', 'form', 'get', 'set', 'object', 'null', 'new'])\n",
150+
"\n",
144151
"if number_of_words > 0:\n",
145-
" # Define stop words\n",
146-
" stopwords = set(STOPWORDS)\n",
147-
" stopwords.update(['builder', 'exception', 'abstract', 'helper', 'util', 'callback', 'factory', 'handler', 'repository', 'result'])\n",
148-
" wordcloud = WordCloud(stopwords=stopwords, background_color='white', colormap='viridis').generate(text)\n",
152+
" wordcloud = WordCloud(\n",
153+
" width=800, \n",
154+
" height=400,\n",
155+
" max_words=400, \n",
156+
" stopwords=stopwords,\n",
157+
" background_color='white', \n",
158+
" colormap='viridis'\n",
159+
" ).generate(text)\n",
149160
"\n",
150161
" # Plot the word cloud\n",
151162
" plot.figure(figsize=(15,10))\n",

0 commit comments

Comments
 (0)