首页 文章

通过主题标签和时间范围查询Instagram帖子

提问于
浏览
11

我正在尝试通过提供主题标签和时间范围(从日期到日期)来查询Instagram中的帖子 . 我用的是recent tags endpoint .

https://api.instagram.com/v1/tags//media/recent?access_token=ACCESS-TOKEN

我的代码是使用instagram-node库在Node.js中编写的(请参阅内联注释):

// Require the config file
var config = require('../config.js');

// Require and intialize the instagram instance
var ig = require('instagram-node').instagram();

// Set the access token
ig.use({ access_token: config.instagram.access_token });

// We export this function for public use
// hashtag: the hashtag to search for
// minDate: the since date
// maxDate: the until date
// callback: the callback function (err, posts)
module.exports = function (hashtag, minDate, maxDate, callback) {

  // Create the posts array (will be concated with new posts from pagination responses)
  var posts = [];

  // Convert the date objects into timestamps (seconds)
  var sinceTime = Math.floor(minDate.getTime() / 1000);
  var untilTime = Math.floor(maxDate.getTime() / 1000);

  // Fetch the IG posts page by page
  ig.tag_media_recent(hashtag, { count: 50 }, function fetchPosts(err, medias, pagination, remaining, limit) {

    // Handle error
    if (err) {
      return callback(err);
    }

    // Manually filter by time
    var filteredByTime = medias.filter(function (currentPost) {
      // Convert the created_time string into number (seconds timestamp)
      var createdTime = +currentPost.created_time;

      // Check if it's after since date and before until date
      return createdTime >= sinceTime && createdTime <= untilTime;
    });

    // Get the last post on this page
    var lastPost = medias[medias.length - 1] || {};

    // ...and its timestamp
    var lastPostTimeStamp = +(lastPost.created_time || -1);

    // ...and its timestamp date object
    var lastPostDate = new Date(lastPostTimeStamp * 1000);

    // Concat the new [filtered] posts to the big array
    posts = posts.concat(filteredByTime);

    // Show some output
    console.log('found ' + filteredByTime.length + ' new items total: ' + posts.length, lastPostDate);


    // Check if the last post is BEFORE until date and there are no new posts in the provided range
    if (filteredByTime.length === 0 && lastPostTimeStamp <= untilTime) {
      // ...if so, we can callback!
      return callback(null, posts);
    }

    // Navigate to the next page
    pagination.next(fetchPosts);
  });
};

这将开始获取最新到最近的帖子,并手动过滤 created_time . 这是有效的,但效率非常低,因为如果我们想要从一年前获得帖子,我们必须迭代页面直到那个时候,这将使用大量请求(可能超过5k /小时)这是速率限制) .

有没有更好的方法来进行此查询?如何通过提供标签和时间范围来获取Instagram帖子?

2 回答

  • 2

    我认为这是你正在寻找的基本想法 . 我对Node.js并不是很熟悉,所以这都是普通的javascript . 你必须修改它以满足你的需要,并可能从中做出一个功能 .

    我们的想法是将Instagram ID(在此示例中为1116307519311125603)转换为日期,反之亦然,以便您能够快速获取特定时间点,而不是回溯所有结果,直到找到所需的时间戳 . 下划线'_'后面的id部分应该被修剪掉,因为它在某种程度上指的是用户IIRC . 示例中有4个函数,希望对您有所帮助 .

    快乐的黑客!

    //static
    var epoch_hour = 3600,
        epoch_day = 86400,
        epoch_month = 2592000,
        epoch_year = 31557600;
    
    //you'll need to set this part up/integrate it with your code
    var dataId = 1116307519311125603,
        range = 2 * epoch_hour,
        count = 1,
        tagName = 'cars',
        access = prompt('Enter access token:'),
        baseUrl = 'https://api.instagram.com/v1/tags/' + 
                  tagName + '/media/recent?access_token=' + access;
    
    //date && id utilities
    function idToEpoch(n){
      return Math.round((n / 1000000000000 + 11024476.5839159095) / 0.008388608);
    }
    
    function epochToId(n){
      return Math.round((n * 0.008388608 - 11024476.5839159095) * 1000000000000);
    }
    
    function newDateFromEpoch(n){
      var d = new Date(0);
      d.setUTCSeconds(n);
      return d;
    }
    
    function dateToEpoch(d){
      return (d.getTime()-d.getMilliseconds())/1000;
    }
    
    //start with your id and range; do the figuring
    var epoch_time = idToEpoch(dataId),
        minumumId = epochToId(epoch_time),
        maximumId = epochToId(epoch_time + range),
        minDate = newDateFromEpoch(epoch_time),
        maxDate = newDateFromEpoch(epoch_time + range);
    
    var newUrl = baseUrl + 
                 '&count=' + count + 
                 '&min_tag_id=' + minumumId + 
                 '&max_tag_id=' + maximumId;
    
    
    //used for testing
    /*alert('Start: ' + minDate + ' (' + epoch_time + 
            ')\nEnd: ' + maxDate + ' (' + (epoch_time +
            range) + ')');
    window.location = newUrl;*/
    
  • 6

    为了支持this优秀的答案,通过plpgSQL函数生成一个Instagram ID:

    CREATE OR REPLACE FUNCTION insta5.next_id(OUT result bigint) AS $$
    DECLARE
        our_epoch bigint := 1314220021721;
        seq_id bigint;
        now_millis bigint;
        shard_id int := 5;
    BEGIN
        SELECT nextval('insta5.table_id_seq') %% 1024 INTO seq_id;
    
        SELECT FLOOR(EXTRACT(EPOCH FROM clock_timestamp()) * 1000) INTO now_millis;
        result := (now_millis - our_epoch) << 23;
        result := result | (shard_id << 10);
        result := result | (seq_id);
    END;
    $$ LANGUAGE PLPGSQL;
    

    来自Instagram's blog

相关问题