Using Java API to access Google Search Console Data

This blog talk about ingesting the Google Search Console data using the Java API.

There are various kinds of reports that are available in Google Search Console (GSC). Ex: Search Analytics, Sitemaps, Sites, URL crawl errors/metrics  etc.

The API endpoints are defined in :
https://developers.google.com/apis-explorer/?hl=en_US#p/webmasters/v3/

Before you begin setting up your Java env, make sure you have access to the Google Search Console dashboard and also has an admin access to create the service key for API access.

Steps to create service account for API access :


1. Go to API Manager : https://console.developers.google.com/apis/credentials

2. Select your project from the dropdown list.

3. Click on "Create Credentials" and then select "Service Account Key"

4. Provide a service account name and select "P12" as the key type.


5. Note the email id for this service account and Save the p12 file once you have created the new service account.
6. Now go to the Google Search Console dashboard : https://www.google.com/webmasters/tools/search-analytics?hl=en

7. Select a property and you should be seeing the following window :

8. Now go to Settings ->  Users and Property Owners.

9. Click "ADD A NEW USER"



10. Paste the email id that you created for your service account and provide Full permission.

Creating Java API client :

I am using maven project in Eclipse for building the API client.
Disclaimer : I am not talking about best practices, it's just a simple way to access the API data.

The dependencies defined in pom.xml are :

<dependencies>
        <dependency>
            <groupId>com.google.api-client</groupId>
            <artifactId>google-api-client</artifactId>
            <version>1.22.0</version>
        </dependency>

        <!-- Google Search Console API dependencies -->
        <dependency>
            <groupId>com.google.apis</groupId>
            <artifactId>google-api-services-webmasters</artifactId>
            <version>v3-rev24-1.22.0</version>
        </dependency>

        <dependency>
            <groupId>com.google.apis</groupId>
            <artifactId>google-api-services-oauth2</artifactId>
            <version>v2-rev75-1.19.0</version>
        </dependency>

        <dependency>
            <groupId>com.google.oauth-client</groupId>
            <artifactId>google-oauth-client-jetty</artifactId>
            <version>1.22.0</version>
        </dependency>

        <!-- Third party dependencies -->
        <dependency>
            <groupId>com.google.http-client</groupId>
            <artifactId>google-http-client-jackson2</artifactId>
            <version>1.22.0</version>
        </dependency>

        <dependency>
            <groupId>commons-logging</groupId>
            <artifactId>commons-logging</artifactId>
            <version>1.1.1</version>
        </dependency>
<dependencies>

Search Analytics Query Report

public class SearchAnalyticsQueryReport {

    private static final Log LOGGER = LogFactory.getLog(SearchAnalyticsQueryReport.class);

    public static void main(String[] args) throws Exception {

        String reportstartDt = args[0]; // Format : YYYY-MM-DD
        String reportEndDt   = args[1]; // Format : YYYY-MM-DD
        String outputPath    = args[2];

        Webmasters service = GoogleSearchConsoleUtils.getGSCservice();
        BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outputPath)));

        SitesListResponse siteList = service.sites().list().execute();
        if (siteList.isEmpty()) {
            LOGGER.info("Site list is empty!");
            writer.close();
            return;
        }

        String siteUrl;
        for (WmxSite wmxSite : siteList.getSiteEntry()) {
            siteUrl = wmxSite.getSiteUrl();
            getSearchAnalyticsReport(service, writer, siteUrl, reportstartDt, reportEndDt);
        }

        writer.close();
        return;
    }

    /**
     * Generates the Search Analytics Report for the given input dates and Site
     * URL.
     * 
     * @param service
     * @param writer
     * @param siteUrl
     * @param reportstartDt
     * @param reportEndDt
     * @throws Exception
     */
    private static void getSearchAnalyticsReport(Webmasters service, BufferedWriter writer, String siteUrl, String reportstartDt, String reportEndDt) throws Exception {

        SearchAnalyticsQueryRequest saqRequest = createSearchAnalyticsQueryRequest(reportstartDt, reportEndDt);
        LOGGER.info("SITE URL   : " + siteUrl);
        LOGGER.info("PARAMETERS : " + saqRequest.toPrettyString());
        SearchAnalyticsQueryResponse saqResponse = query(service, siteUrl, saqRequest);
        List<ApiDataRow> rowsList = saqResponse.getRows();

        StringBuffer sb = new StringBuffer();

        if (rowsList == null) {
            LOGGER.info("No records for the site : " + siteUrl);
            return;
        }

        for (ApiDataRow row : rowsList) {
            List<String> dimensionList = row.getKeys();
            sb.setLength(0);

            sb.append(siteUrl);
            sb.append(GoogleSearchConsoleConstants.CTRL_A);

            for (String dim : dimensionList) {
                sb.append(dim);
                sb.append(GoogleSearchConsoleConstants.CTRL_A);
            }

            sb.append(row.getClicks());
            sb.append(GoogleSearchConsoleConstants.CTRL_A);

            sb.append(row.getCtr());
            sb.append(GoogleSearchConsoleConstants.CTRL_A);

            sb.append(row.getImpressions());
            sb.append(GoogleSearchConsoleConstants.CTRL_A);

            sb.append(row.getPosition());

            writer.write(sb.toString());
            writer.newLine();
        }

        writer.flush();
    }

    /**
     * Returns the Search Query Request for the specified report dates.
     * 
     * @param reportstartDt
     * @param reportEndDt
     * @return query
     * @throws IOException
     */
    private static SearchAnalyticsQueryRequest createSearchAnalyticsQueryRequest(String reportstartDt, String reportEndDt) throws IOException {

        SearchAnalyticsQueryRequest searchQueryRequest = new SearchAnalyticsQueryRequest();

        searchQueryRequest.setStartDate(reportstartDt);
        searchQueryRequest.setEndDate(reportEndDt);

        List<String> dimensions = new ArrayList<String>();
        dimensions.add("date");
        dimensions.add("page");
        dimensions.add("query");
        dimensions.add("country");
        dimensions.add("device");
        searchQueryRequest.setDimensions(dimensions);

        return searchQueryRequest;
    }

    /**
     * Returns the response for the entered request.
     * 
     * @param service
     * @param site
     * @param searQueryRequest
     * @return SearchAnalyticsQueryResponse
     * @throws Exception
     */
    public static SearchAnalyticsQueryResponse query(Webmasters service, String site,
            SearchAnalyticsQueryRequest searQueryRequest) throws Exception {

        Webmasters.Searchanalytics.Query query = service.searchanalytics().query(site, searQueryRequest);
        SearchAnalyticsQueryResponse queryResponse = query.execute();

        return queryResponse;
    }

}

URL Crawl Error Report

public class UrlCrawlErrorsReport {

    private static final Log LOGGER = LogFactory.getLog(UrlCrawlErrorsReport.class);

    public static void main(String[] args) throws Exception {

        String reportstartDt = args[0];
        String reportEndDt   = args[1];
        String outputPath    = args[2];

        Webmasters service = GoogleSearchConsoleUtils.getGSCservice();
        BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outputPath)));

        SitesListResponse siteList = service.sites().list().execute();
        if (siteList.isEmpty()) {
            LOGGER.info("Site list is empty!");
        }

        String siteUrl;
        for (WmxSite wmxSite : siteList.getSiteEntry()) {
            siteUrl = wmxSite.getSiteUrl();
            getUrlCrawlReport(service, writer, siteUrl, reportstartDt, reportEndDt);
        }
        writer.close();
    }

    /**
     * Generates the Search Analytics Report for the given input dates and Site
     * URL.
     * 
     * @param service
     * @param writer
     * @param siteUrl
     * @param reportstartDt
     * @param reportEndDt
     * @throws Exception
     */
    private static void getUrlCrawlReport(Webmasters service, BufferedWriter writer, String siteUrl, String reportstartDt, String reportEndDt) throws Exception {

        LOGGER.info("SITE URL   : " + siteUrl);
        UrlCrawlErrorsCountsQueryResponse saqResponse = query(service, siteUrl);

        StringBuffer sb = new StringBuffer();
        for (UrlCrawlErrorCountsPerType record : saqResponse.getCountPerTypes()) {
            sb.setLength(0);

            sb.append(siteUrl);
            sb.append(GoogleSearchConsoleConstants.CTRL_A);

            sb.append(record.getPlatform());
            sb.append(GoogleSearchConsoleConstants.CTRL_A);

            sb.append(record.getCategory());
            sb.append(GoogleSearchConsoleConstants.CTRL_A);

            List<UrlCrawlErrorCount> entriesList = record.getEntries();

            for (UrlCrawlErrorCount entry : entriesList) {
                sb.append(entry.getCount());
                sb.append(GoogleSearchConsoleConstants.CTRL_A);

                sb.append(entry.getTimestamp());
            }

            writer.write(sb.toString());
            writer.newLine();
        }

        writer.flush();
    }

    /**
     * Returns the response for the entered request.
     * 
     * @param service
     * @param site
     * @return UrlCrawlErrorsCountsQueryResponse
     * @throws Exception
     */
    public static UrlCrawlErrorsCountsQueryResponse query(Webmasters service, String site) throws Exception {

        Webmasters.Urlcrawlerrorscounts.Query query = service.urlcrawlerrorscounts().query(site);
        UrlCrawlErrorsCountsQueryResponse queryResponse = query.execute();

        return queryResponse;
    }

}

Utils Class

public class GoogleSearchConsoleUtils {

    /**
     * Sets the connect and read timeouts in milliseconds for all requests.
     * 
     * @param requestInitializer
     * @return
     */
    private static HttpRequestInitializer setHttpTimeout(final HttpRequestInitializer requestInitializer) {
        return new HttpRequestInitializer() {
            @Override
            public void initialize(HttpRequest httpRequest) throws IOException {
                requestInitializer.initialize(httpRequest);
                httpRequest.setConnectTimeout(3 * 60000); // 3 min connect
                                                          // timeout
                httpRequest.setReadTimeout(3 * 60000); // 3 min read timeout
            }
        };
    }

    /**
     * Returns the Webmasters service.
     * 
     * @return Webmasters
     * @throws GeneralSecurityException
     * @throws IOException
     * @throws URISyntaxException
     */
    public static Webmasters getGSCservice() throws GeneralSecurityException, IOException, URISyntaxException {
        HttpTransport httpTransport = new NetHttpTransport();
        JsonFactory jsonFactory = new JacksonFactory();

        PrivateKey privateKey = SecurityUtils.loadPrivateKeyFromKeyStore(SecurityUtils.getPkcs12KeyStore(),
                GoogleSearchConsoleUtils.class.getResourceAsStream("/SHAPI.p12"), "notasecret", "privatekey", "notasecret");

        GoogleCredential credential = new GoogleCredential.Builder()
                                         .setTransport(httpTransport)
                                         .setJsonFactory(jsonFactory)
                                         .setServiceAccountId(GSC_ACC_EMAIL)
                                         .setServiceAccountPrivateKey(privateKey)
                                         .setServiceAccountScopes(Collections
                                            .singletonList(WebmastersScopes.WEBMASTERS))
                                         .build();

        // Create a new authorized API client
        Webmasters service = new Webmasters.Builder(httpTransport, jsonFactory, setHttpTimeout(credential))
                            .setApplicationName(GSC_APP_NAME).build();

        return service;
    }

}

Comments

Popular posts from this blog

Accessing Hbase table via Hive.

Setting Up Eclipse to run Spark using Scala