September 21st, 2022

Using GraphQL, HTTPX, and asyncio in H2O Wave

RSS icon RSS Category: Uncategorized [EN]

Basic project setup

  • Create an empty directory.
  • Create a script.py file.
  • Create and activate a Python virtual environment (python3 -m venv venv && source venv/bin/activate).
  • Install H2O Wave (pip install h2o-wave).
# Imports that we will eventually need.
import collections
from asyncio import ensure_future, gather, get_event_loop

from h2o_wave import data, site, ui
from httpx import AsyncClient

# Register page at "/" route.
page = site['/']

# Setup layout.
page['meta'] = ui.meta_card(
    box='',
    title='Wave comparison',
    layouts=[
        ui.layout(breakpoint='xs', zones=[
            ui.zone(name='header'),
            ui.zone(name='intro', direction=ui.ZoneDirection.ROW, size='500px'),
            ui.zone(name='plots1', direction=ui.ZoneDirection.ROW, size='300px'),
            ui.zone(name='plots2', direction=ui.ZoneDirection.ROW, size='300px'),
    ]),
])

# Render header.
page['header'] = ui.header_card(
    box='header',
    title='Wave competition comparison',
    subtitle="Let's see how well Wave does against its rivals.",
    image='https://wave.h2o.ai/img/h2o-logo.svg',
)

page.save()
Basic project scaffolding.

Why script and not an app?

Authentication tokens

# TODO: Fill with yours.
GH_TOKEN = ''
TWITTER_BEARER_TOKEN = ''
Setting the tokens.

GraphQL query

graphql_query = """
fragment repoFields on Repository {
  createdAt
  description
  forkCount
  homepageUrl
  openGraphImageUrl
  stargazerCount
  licenseInfo {
    name
  }
  vulnerabilityAlerts {
    totalCount
  }
  watchers {
    totalCount
  }
  latestRelease {
    createdAt
  }
}

{
  Wave: repository(name: "wave", owner: "h2oai") {
    ...repoFields
  }
  Streamlit: repository(name: "streamlit", owner: "streamlit") {
    ...repoFields
  }
  Dash: repository(name: "dash", owner: "plotly") {
    ...repoFields
  }
  Shiny: repository(name: "shiny", owner: "rstudio") {
    ...repoFields
  }
}
"""
GraphQL query.

Data fetching

def fill_github_issues(client, framework, repo, org, data, query):
    query += f' repo:{org}/{repo}'
    open_issues = client.get(
        url=f'https://api.github.com/search/issues',
        params={'q': f'{query} is:open'},
        headers={'Authorization': 'Bearer ' + GH_TOKEN}
    )
    closed_issues = client.get(
        url=f'https://api.github.com/search/issues',
        params={'q': f'{query} is:closed'},
        headers={'Authorization': 'Bearer ' + GH_TOKEN}
    )
    
    # Fill the provided data dict for later consumption.
    data['bugs'].append([framework, 'open', open_issues.json()['total_count']])
    data['bugs'].append([framework, 'closed', closed_issues.json()['total_count']])

def fill_github_data(client, data):
    # Make an HTTP Post request with JSON containing our GraphQL query.
    res = client.post(
        url='https://api.github.com/graphql',
        headers={'Authorization': 'Bearer ' + GH_TOKEN},
        json={'query': graphql_query}
    )
    # Unpack the response.
    data['github_data'] = res.json()['data']
    # Fill the provided data dict for later consumption.
    for name, metadata in data['github_data'].items():
        data['vulnerabilities'].append([name, metadata['vulnerabilityAlerts']['totalCount']])
        data['watchers'].append([name, metadata['watchers']['totalCount']])
        data['stars'].append([name, metadata['stargazerCount']])

def fill_twitter_data(client, framework, data):
    res = client.get(
        url='https://api.twitter.com/2/tweets/counts/recent',
        headers={'Authorization': 'Bearer ' + TWITTER_BEARER_TOKEN},
        params={'query': framework}
    )
    # Fill the provided data dict for later consumption.
    data['twitter_data'].append([framework, res.json()['meta']['total_tweet_count']])

def fill_stackoverflow_data(client, tag, data):
    res = client.get(f'https://api.stackexchange.com/2.2/tags?inname={tag}&site=stackoverflow')
    # Fill the provided data dict for later consumption.
    data['so_data'].append([tag, res.json()['items'][0]['count']])
Fetching helper functions.
# Fetch data.
plot_data = collections.defaultdict(list)
with Client() as client:
    label_query = 'is:issue label:bug'
    title_query = 'bug in:title'
    fill_github_data(client, plot_data),
    fill_github_issues(client, 'H2O Wave', 'wave', 'h2oai', plot_data, label_query),
    fill_github_issues(client, 'Streamlit', 'streamlit', 'streamlit', plot_data, label_query),
    fill_github_issues(client, 'Plotly Dash', 'dash', 'plotly', plot_data, title_query),
    fill_github_issues(client, 'R Shiny', 'shiny', 'rstudio', plot_data, 'bug'),
    fill_twitter_data(client, 'H2O Wave', plot_data),
    fill_twitter_data(client, 'Streamlit', plot_data),
    fill_twitter_data(client, 'Plotly Dash', plot_data),
    fill_twitter_data(client, 'R Shiny', plot_data),
    fill_stackoverflow_data(client, 'h2o-wave', plot_data),
    fill_stackoverflow_data(client, 'streamlit', plot_data),
    fill_stackoverflow_data(client, 'plotly-dash', plot_data),
    fill_stackoverflow_data(client, 'shiny', plot_data),
Synchronous fetching.
async def main():
    # Register page at "/" route.
    page = site['/']

    # Setup layout.
    page['meta'] = ui.meta_card(
        box='',
        title='Wave comparison',
        layouts=[
            ui.layout(breakpoint='xs', zones=[
                ui.zone(name='header'),
                ui.zone(name='intro', direction=ui.ZoneDirection.ROW, size='500px'),
                ui.zone(name='plots1', direction=ui.ZoneDirection.ROW, size='300px'),
                ui.zone(name='plots2', direction=ui.ZoneDirection.ROW, size='300px'),
        ]),
    ])

    # Render header.
    page['header'] = ui.header_card(
        box='header',
        title='Wave competition comparison',
        subtitle="Let's see how well Wave does against its rivals.",
        image='https://wave.h2o.ai/img/h2o-logo.svg',
    )

    page.save()

# Run within asyncio event loop to allow concurrent HTTP calls.
loop = get_event_loop()
loop.run_until_complete(main())
Run the Wave script within the event loop.
async def fill_github_issues(client: AsyncClient, framework, repo, org, data, query):
    query += f' repo:{org}/{repo}'
    # Use gather to make parallel calls and wait until both are complete.
    open_issues, closed_issues = await gather(
        client.get(
            url=f'https://api.github.com/search/issues',
            params={'q': f'{query} is:open'},
            headers={'Authorization': 'Bearer ' + GH_TOKEN}
        ),
        client.get(
            url=f'https://api.github.com/search/issues',
            params={'q': f'{query} is:closed'},
            headers={'Authorization': 'Bearer ' + GH_TOKEN}
        ),
    )
    data['bugs'].append([framework, 'open', open_issues.json()['total_count']])
    data['bugs'].append([framework, 'closed', closed_issues.json()['total_count']])

async def fill_github_data(client: AsyncClient, data):
    # Make an HTTP Post request with JSON containing our query.
    res = await client.post(
        url='https://api.github.com/graphql',
        headers={'Authorization': 'Bearer ' + GH_TOKEN},
        json={'query': graphql_query}
    )
    # Unpack the response.
    data['github_data'] = res.json()['data']
    # Fill the provided data dict for later consumption.
    for name, metadata in data['github_data'].items():
        data['vulnerabilities'].append([name, metadata['vulnerabilityAlerts']['totalCount']])
        data['watchers'].append([name, metadata['watchers']['totalCount']])
        data['stars'].append([name, metadata['stargazerCount']])

async def fill_twitter_data(client: AsyncClient, framework, data):
    res = await client.get(
        url='https://api.twitter.com/2/tweets/counts/recent',
        headers={'Authorization': 'Bearer ' + TWITTER_BEARER_TOKEN},
        params={'query': framework}
    )
    data['twitter_data'].append([framework, res.json()['meta']['total_tweet_count']])

async def fill_stackoverflow_data(client: AsyncClient, tag, data):
    res = await client.get(f'https://api.stackexchange.com/2.2/tags?inname={tag}&site=stackoverflow')
    data['so_data'].append([tag, res.json()['items'][0]['count']])
Convert fetching functions into async/await.
  • asyncio.ensure_future = wraps coroutine (async function in our case) and returns a python Future object, which is basically kind of a “promise” that the function will resolve at some later point (in the future). For Javascript developers, this is the equivalent of promise.
  • asyncio.gather = helper function that takes a list of futures and resolves once they are all resolved. Also returns a future itself.
# Fetch data.
plot_data = collections.defaultdict(list)
async with AsyncClient() as client:
    label_query = 'is:issue label:bug'
    title_query = 'bug in:title'
    # Wait until all requests have been fulfilled.
    await gather(
        ensure_future(fill_github_data(client, plot_data)),
        ensure_future(fill_github_issues(client, 'H2O Wave', 'wave', 'h2oai', plot_data, label_query)),
        ensure_future(fill_github_issues(client, 'Streamlit', 'streamlit', 'streamlit', plot_data, label_query)),
        ensure_future(fill_github_issues(client, 'Plotly Dash', 'dash', 'plotly', plot_data, title_query)),
        ensure_future(fill_github_issues(client, 'R Shiny', 'shiny', 'rstudio', plot_data, 'bug')),
        ensure_future(fill_twitter_data(client, 'H2O Wave', plot_data)),
        ensure_future(fill_twitter_data(client, 'Streamlit', plot_data)),
        ensure_future(fill_twitter_data(client, 'Plotly Dash', plot_data)),
        ensure_future(fill_twitter_data(client, 'R Shiny', plot_data)),
        ensure_future(fill_stackoverflow_data(client, 'h2o-wave', plot_data)),
        ensure_future(fill_stackoverflow_data(client, 'streamlit', plot_data)),
        ensure_future(fill_stackoverflow_data(client, 'plotly-dash', plot_data)),
        ensure_future(fill_stackoverflow_data(client, 'shiny', plot_data)),
    )
Fetching data concurrently.

Display the data

# Render overview cards for every framework.
for name, metadata in plot_data['github_data'].items():
    latest_release = None
    if metadata['latestRelease'] != None:
        latest_release = metadata['latestRelease']['createdAt']
    page[f'overview-{name}'] = ui.tall_article_preview_card(
        box=ui.box('intro', width='25%'),
        title=name,
        subtitle=metadata['licenseInfo']['name'],
        image=metadata['openGraphImageUrl'],
        content=f"""
{metadata['description']}
</br></br>
**Created**: {metadata['createdAt'].split('T')[0]}
</br>
**Last release**: {latest_release.split('T')[0] if latest_release else 'Unknown'}
</br>
**Homepage**: {metadata['homepageUrl']}
        """
    )

# Render plots.
page['bugs'] = ui.plot_card(
    box=ui.box('plots1', width='25%', order=1),
    title='Bugs',
    data=data('framework state bugs', 4, rows=plot_data['bugs'], pack=True),
    plot=ui.plot([
        ui.mark(type='interval', x='=framework', y='=bugs',
                color='=state', dodge='auto', color_range='$red $green', y_min=0)
    ])
)

page['watchers'] = ui.plot_card(
    box=ui.box('plots1', width='25%', order=2),
    title='Watchers',
    data=data('framework watchers', 4, rows=plot_data['watchers'], pack=True),
    plot=ui.plot([
        ui.mark(type='interval', x='=framework', y='=watchers', y_min=0,
                fill_color='$green')
    ])
)

page['stars'] = ui.plot_card(
    box=ui.box('plots1', width='25%', order=3),
    title='Stars',
    data=data('framework stars', 4, rows=plot_data['stars'], pack=True),
    plot=ui.plot([
        ui.mark(type='interval', x='=framework', y='=stars', y_min=0,
                fill_color='$yellow')
    ])
)

page['vulnerabilities'] = ui.plot_card(
    box=ui.box('plots1', width='25%', order=4),
    title='Vulnerabilities',
    data=data('framework vulns', 4, rows=plot_data['vulnerabilities'], pack=True),
    plot=ui.plot([
        ui.mark(type='interval', x='=framework', y='=vulns', y_min=0)
    ])
)
page['stackoverflow'] = ui.plot_card(
    box='plots2',
    title='Stack overflow questions',
    data=data('framework questions', 4, rows=plot_data['so_data'], pack=True),
    plot=ui.plot([
        ui.mark(type='interval', x='=framework', y='=questions', y_min=0,
                fill_color='$orange')
    ])
)

page['twitter'] = ui.plot_card(
    box='plots2',
    title='Twitter tweets for the past week',
    data=data('framework tweets', 4, rows=plot_data['twitter_data'], pack=True),
    plot=ui.plot([
        ui.mark(type='interval', x='=framework', y='=tweets', y_min=0,
                fill_color='$blue')
    ])
)
Display the data.

Wrapping up

import collections
from asyncio import ensure_future, gather, get_event_loop

from h2o_wave import data, site, ui
from httpx import AsyncClient

# TODO: Fill with yours.
GH_TOKEN = ''
TWITTER_BEARER_TOKEN = ''

graphql_query = """
fragment repoFields on Repository {
  createdAt
  description
  forkCount
  homepageUrl
  openGraphImageUrl
  stargazerCount
  licenseInfo {
    name
  }
  vulnerabilityAlerts {
    totalCount
  }
  watchers {
    totalCount
  }
  latestRelease {
    createdAt
  }
}

{
  Wave: repository(name: "wave", owner: "h2oai") {
    ...repoFields
  }
  Streamlit: repository(name: "streamlit", owner: "streamlit") {
    ...repoFields
  }
  Dash: repository(name: "dash", owner: "plotly") {
    ...repoFields
  }
  Shiny: repository(name: "shiny", owner: "rstudio") {
    ...repoFields
  }
}
"""


async def fill_github_issues(client: AsyncClient, framework, repo, org, data, query):
    query += f' repo:{org}/{repo}'
    # Use gather to make parallel calls and wait until both are complete.
    open_issues, closed_issues = await gather(
        client.get(
            url=f'https://api.github.com/search/issues',
            params={'q': f'{query} is:open'},
            headers={'Authorization': 'Bearer ' + GH_TOKEN}
        ),
        client.get(
            url=f'https://api.github.com/search/issues',
            params={'q': f'{query} is:closed'},
            headers={'Authorization': 'Bearer ' + GH_TOKEN}
        ),
    )
    data['bugs'].append([framework, 'open', open_issues.json()['total_count']])
    data['bugs'].append([framework, 'closed', closed_issues.json()['total_count']])


async def fill_github_data(client, data):
    # Make an HTTP Post request with JSON containing our query.
    res = await client.post(
        url='https://api.github.com/graphql',
        headers={'Authorization': 'Bearer ' + GH_TOKEN},
        json={'query': graphql_query}
    )
    # Unpack the response.
    data['github_data'] = res.json()['data']
    # Fill the provided data dict for later consumption.
    for name, metadata in data['github_data'].items():
        data['vulnerabilities'].append([name, metadata['vulnerabilityAlerts']['totalCount']])
        data['watchers'].append([name, metadata['watchers']['totalCount']])
        data['stars'].append([name, metadata['stargazerCount']])


async def fill_twitter_data(client, framework, data):
    res = await client.get(
        url='https://api.twitter.com/2/tweets/counts/recent',
        headers={'Authorization': 'Bearer ' + TWITTER_BEARER_TOKEN},
        params={'query': framework}
    )
    data['twitter_data'].append([framework, res.json()['meta']['total_tweet_count']])


async def fill_stackoverflow_data(client, tag, data):
    res = await client.get(f'https://api.stackexchange.com/2.2/tags?inname={tag}&site=stackoverflow')
    data['so_data'].append([tag, res.json()['items'][0]['count']])


# Register page at "/" route.
page = site['/']


async def main():
    # Setup layout.
    page['meta'] = ui.meta_card(
        box='',
        title='Wave comparison',
        layouts=[
            ui.layout(breakpoint='xs', zones=[
                ui.zone(name='header'),
                ui.zone(name='intro', direction=ui.ZoneDirection.ROW, size='500px'),
                ui.zone(name='plots1', direction=ui.ZoneDirection.ROW, size='300px'),
                ui.zone(name='plots2', direction=ui.ZoneDirection.ROW, size='300px'),
            ]),
        ])

    # Render header.
    page['header'] = ui.header_card(
        box='header',
        title='Wave competition comparison',
        subtitle="Let's see how well Wave does against its rivals.",
        image='https://wave.h2o.ai/img/h2o-logo.svg',
    )

    # Fetch data.
    plot_data = collections.defaultdict(list)
    async with AsyncClient() as client:
        label_query = 'is:issue label:bug'
        title_query = 'bug in:title'
        await gather(
            ensure_future(fill_github_data(client, plot_data)),
            ensure_future(fill_github_issues(client, 'H2O Wave', 'wave', 'h2oai', plot_data, label_query)),
            ensure_future(fill_github_issues(client, 'Streamlit', 'streamlit', 'streamlit', plot_data, label_query)),
            ensure_future(fill_github_issues(client, 'Plotly Dash', 'dash', 'plotly', plot_data, title_query)),
            ensure_future(fill_github_issues(client, 'R Shiny', 'shiny', 'rstudio', plot_data, 'bug')),
            ensure_future(fill_twitter_data(client, 'H2O Wave', plot_data)),
            ensure_future(fill_twitter_data(client, 'Streamlit', plot_data)),
            ensure_future(fill_twitter_data(client, 'Plotly Dash', plot_data)),
            ensure_future(fill_twitter_data(client, 'R Shiny', plot_data)),
            ensure_future(fill_stackoverflow_data(client, 'h2o-wave', plot_data)),
            ensure_future(fill_stackoverflow_data(client, 'streamlit', plot_data)),
            ensure_future(fill_stackoverflow_data(client, 'plotly-dash', plot_data)),
            ensure_future(fill_stackoverflow_data(client, 'shiny', plot_data)),
        )

    # Render overview cards for every framework.
    for name, metadata in plot_data['github_data'].items():
        latest_release = None
        if metadata['latestRelease'] != None:
            latest_release = metadata['latestRelease']['createdAt']
        page[f'overview-{name}'] = ui.tall_article_preview_card(
            box=ui.box('intro', width='25%'),
            title=name,
            subtitle=metadata['licenseInfo']['name'],
            image=metadata['openGraphImageUrl'],
            content=f"""
{metadata['description']}
</br></br>
**Created**: {metadata['createdAt'].split('T')[0]}
</br>
**Last release**: {latest_release.split('T')[0] if latest_release else 'Unknown'}
</br>
**Homepage**: {metadata['homepageUrl']}
            """
        )

    # Render plots.
    page['bugs'] = ui.plot_card(
        box=ui.box('plots1', width='25%', order=1),
        title='Bugs',
        data=data('framework state bugs', 4, rows=plot_data['bugs'], pack=True),
        plot=ui.plot([
            ui.mark(type='interval', x='=framework', y='=bugs',
                    color='=state', dodge='auto', color_range='$red $green', y_min=0)
        ])
    )

    page['watchers'] = ui.plot_card(
        box=ui.box('plots1', width='25%', order=2),
        title='Watchers',
        data=data('framework watchers', 4, rows=plot_data['watchers'], pack=True),
        plot=ui.plot([
            ui.mark(type='interval', x='=framework', y='=watchers', y_min=0,
                    fill_color='$green')
        ])
    )

    page['stars'] = ui.plot_card(
        box=ui.box('plots1', width='25%', order=3),
        title='Stars',
        data=data('framework stars', 4, rows=plot_data['stars'], pack=True),
        plot=ui.plot([
            ui.mark(type='interval', x='=framework', y='=stars', y_min=0,
                    fill_color='$yellow')
        ])
    )

    page['vulnerabilities'] = ui.plot_card(
        box=ui.box('plots1', width='25%', order=4),
        title='Vulnerabilities',
        data=data('framework vulns', 4, rows=plot_data['vulnerabilities'], pack=True),
        plot=ui.plot([
            ui.mark(type='interval', x='=framework', y='=vulns', y_min=0)
        ])
    )

    page['stackoverflow'] = ui.plot_card(
        box='plots2',
        title='Stack overflow questions',
        data=data('framework questions', 4, rows=plot_data['so_data'], pack=True),
        plot=ui.plot([
            ui.mark(type='interval', x='=framework', y='=questions', y_min=0,
                    fill_color='$orange')
        ])
    )

    page['twitter'] = ui.plot_card(
        box='plots2',
        title='Twitter tweets for the past week',
        data=data('framework tweets', 4, rows=plot_data['twitter_data'], pack=True),
        plot=ui.plot([
            ui.mark(type='interval', x='=framework', y='=tweets', y_min=0,
                    fill_color='$blue')
        ])
    )

    page.save()


# Run within asyncio event loop to allow concurrent HTTP calls.
loop = get_event_loop()
loop.run_until_complete(main())
Full code.
Final dashboard.

About the Author

Martin Turoci

Software engineer that hates repetitive tasks, praises automation, clean code and "right tool for the job" rule. Also a big javascript ecosystem fan.
In my free time, I teach ballroom dancing at both competitive and social level (as a former ballroom dancer).

Leave a Reply

+
H2O Wave joins Hacktoberfest

It’s that time of the year again. A great initiative by DigitalOcean called Hacktoberfest that aims to bring

September 29, 2022 - by Martin Turoci
+
Three Keys to Ethical Artificial Intelligence in Your Organization

There’s certainly been no shortage of examples of AI gone bad over the past few

September 23, 2022 - by H2O.ai Team
+
머신러닝 자동화 솔루션 H2O Driveless AI를 이용한 뇌에서의 성차 예측

Predicting Gender Differences in the Brain Using Machine Learning Automation Solution H2O Driverless AI 아동기 뇌인지

August 29, 2022 - by H2O.ai Team
+
Make with H2O.ai Recap: Validation Scheme Best Practices

Data Scientist and Kaggle Grandmaster, Dmitry Gordeev, presented at the Make with H2O.ai session on

August 23, 2022 - by Blair Averett
+
Integrating VSCode editor into H2O Wave

Let’s have a look at how to provide our users with a truly amazing experience

August 18, 2022 - by Martin Turoci
+
5 Tips for Improving Your Wave Apps

Let’s quickly uncover a few simple tips that are quick to implement and have a

August 9, 2022 - by Martin Turoci

Start Your Free Trial