かれ4

かれこれ4個目のブログ

SimpleDBでオフセットを指定する方法(解決編)

SimpleDBの弱点

これまでSimpleDBの弱いと思っていたところそれはオフセットの指定

オフセットの指定が出来ないからページングも、大変でした。

3ページ目のデータを取るには、

select * from domain where nan = chara order by nan limit 100;

こんな感じで
1ページ目を取得して、NextTokenを使って2ページ目を取得して、
さらにそのNextTokenを使ってやっと3ページ目を取得する事ができます。
これが15ページ目だったら、、、、




RDBMSなら

select * from table limit 100 OFFSET 1400;


とやれば良かったことなのに、SimpleDBでは出来ないと言われてきました。

Webサービスで使いたくてもこれのせいで使えなかった人も多いんじゃないでしょうか?


forum でも 同じような事で悩んでいる方もいるようです。

https://forums.aws.amazon.com/message.jspa?messageID=253237#253237


でも、これの解決策を見つけました。NextTokenを自分で生成してしまえば、
一度のリクエストで2ページ目に表示するデータだろうが、15ページ目に表示するデータだろうが取放題です。


確認してみたら、中の人(@) 曰く 「ウチで挙動を保証してるかはわかんないですねえ・・・」との事なので、使う際は"自己責任"で


これを実現するためにNextTokenを色々解析しましたが、その辺の難しいことはまた後日〜解析編〜で書く(cloudpack nightで話すかも)として、
とりあえず

SimpleDBのNextTokenを生成するクラス

<?php
class SimpleDBTokenGenerator {
  public function generate_paging_token($limit, $page) {
    $paging_token = array();
    for ($i = 1; $i <= $page; $i++) {
      $paging_token[$i] = $this->generate_token(($i - 1) * $limit);
    }
    return $paging_token;
  }
  public function generate_token($offset) {
    $offset_bin = decbin($offset);
    $base = "101011001110110100000000000001010111001101110010000000000010011101100011011011110110110100101110" .
            "011000010110110101100001011110100110111101101110001011100111001101100100011100110010111001010001" .
            "011101010110010101110010011110010101000001110010011011110110001101100101011100110111001101101111" .
            "011100100010111001001101011011110111001001100101010101000110111101101011011001010110111011101011" .
            "011010011100010111001011100111001000001101001101101010110000001100000000000010110100100100000000" .
            "000101000110100101101110011010010111010001101001011000010110110001000011011011110110111001101010" .
            "011101010110111001100011011101000100100101101110011001000110010101111000010110100000000000001110" .
            "011010010111001101010000011000010110011101100101010000100110111101110101011011100110010001100001" .
            "011100100111100101001010000000000000110001101100011000010111001101110100010001010110111001110100" .
            "011010010111010001111001010010010100010001011010000000000000101001101100011100100111000101000101" .
            "011011100110000101100010011011000110010101100100010010010000000000001111011100010111010101100101" .
            "011100100111100101000011011011110110110101110000011011000110010101111000011010010111010001111001" .
            "010010100000000000010011011100010111010101100101011100100111100101010011011101000111001001101001" .
            "011011100110011101000011011010000110010101100011011010110111001101110101011011010100100100000000" .
            "000010100111010101101110011010010110111101101110010010010110111001100100011001010111100001011010" .
            "000000000000110101110101011100110110010101010001011101010110010101110010011110010100100101101110" .
            "011001000110010101111000010011000000000000001101011000110110111101101110011100110110100101110011" .
            "011101000110010101101110011101000100110001010011010011100111010000000000000100100100110001101010" .
            "011000010111011001100001001011110110110001100001011011100110011100101111010100110111010001110010" .
            "011010010110111001100111001110110100110000000000000100100110110001100001011100110111010001000001" .
            "011101000111010001110010011010010110001001110101011101000110010101010110011000010110110001110101" .
            "011001010111000100000000011111100000000000000001010011000000000000001001011100110110111101110010" .
            "011101000100111101110010011001000110010101110010011101000000000000101111010011000110001101101111" .
            "011011010010111101100001011011010110000101111010011011110110111000101111011100110110010001110011" .
            "001011110101000101110101011001010111001001111001010100000111001001101111011000110110010101110011" .
            "011100110110111101110010001011110101000101110101011001010111001001111001001001000101001101101111" .
            "011100100111010001001111011100100110010001100101011100100011101101111000011100000000000000000000" .
            "000000000000000000000000" . sprintf("%064s", $offset_bin) .
            "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" .
            "00000000000000000000000000000000000000000000000001110000011100000111000001111000";
    $t = str_split($base, 6);
    $c = "";
    foreach ($t as $l) {
      $c .= $this->trans6bitASCII($l);
    }
    return $c;
  }
  private function trans6bitASCII($s) {
    $code = array(
        "A" => "000000", "B" => "000001", "C" => "000010", "D" => "000011",
        "E" => "000100", "F" => "000101", "G" => "000110", "H" => "000111",
        "I" => "001000", "J" => "001001", "K" => "001010", "L" => "001011",
        "M" => "001100", "N" => "001101", "O" => "001110", "P" => "001111",
        "Q" => "010000", "R" => "010001", "S" => "010010", "T" => "010011",
        "U" => "010100", "V" => "010101", "W" => "010110", "X" => "010111",
        "Y" => "011000", "Z" => "011001", "a" => "011010", "b" => "011011",
        "c" => "011100", "d" => "011101", "e" => "011110", "f" => "011111",
        "g" => "100000", "h" => "100001", "i" => "100010", "j" => "100011",
        "k" => "100100", "l" => "100101", "m" => "100110", "n" => "100111",
        "o" => "101000", "p" => "101001", "q" => "101010", "r" => "101011",
        "s" => "101100", "t" => "101101", "u" => "101110", "v" => "101111",
        "w" => "110000", "x" => "110001", "y" => "110010", "z" => "110011",
        "0" => "110100", "1" => "110101", "2" => "110110", "3" => "110111",
        "4" => "111000", "5" => "111001", "6" => "111010", "7" => "111011",
        "8" => "111100", "9" => "111101", "+" => "111110", "/" => "111111"
    );
    return array_search($s, $code);
  }
}

使い方

<?php
require_once "AWSSDKforPHP/sdk.class.php";


$sdb = new AmazonSDB();
$sdb->set_region(AmazonSDB::REGION_APAC_NE1);

// 普通にselect
$res = $sdb->select("select ItemName() from nextTokenHack limit 2");
var_dump($res->body->SelectResult);


$pager = new SimpleDBTokenGenerator();
// 2件づつ 5ページ分のNextTokenを取得
$tokens = $pager->generate_paging_token(2, 5);

// 自分で生成したNextTokenを使ってのSELECT
foreach ($tokens as $token) {
  $res = $sdb->select("select ItemName() from nextTokenHack limit 2 ", array('NextToken' => $token));
  var_dump($res->body->SelectResult);
}


// 自分で生成したNextTokenを使ってのオフセット指定SELECT
// 145番目のデータから取り出し始めます。
$res = $sdb->select("select ItemName() from nextTokenHack limit 2 ", array('NextToken' => $pager->generate_token(145)));
var_dump($res->body->SelectResult);

まとめ

これで、オフセットを指定してデータを取ってくる事ができるようになり、
ページングで3ページ飛ばしとかしても1回のリクエストで済むようになりました。

めでたしめでたし。